From 1700b9c5313df4f9b53cd11a8396cba00d8cc1de Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Fri, 12 Apr 2024 09:20:56 -0500
Subject: [PATCH] change test_automations to use scrape_html remove reliance on
 custom text_scraper class changed url on test case to generic wildmode vs
 specific site

---
 cookbook/helper/recipe_url_import.py     | 63 +++++++++++++-----------
 cookbook/tests/other/test_automations.py | 46 +++++++++--------
 2 files changed, 59 insertions(+), 50 deletions(-)

diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py
index aa0bb2d87..eca6813aa 100644
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -15,12 +15,9 @@ from cookbook.models import Automation, Keyword, PropertyType
 
 
 def get_from_scraper(scrape, request):
-    # converting the scrape_me object to the existing json format based on ld+json
+    # converting the scrape_html object to the existing json format based on ld+json
 
-    recipe_json = {
-        'steps': [],
-        'internal': True
-    }
+    recipe_json = {'steps': [], 'internal': True}
     keywords = []
 
     # assign source URL
@@ -157,11 +154,18 @@ def get_from_scraper(scrape, request):
     # assign steps
     try:
         for i in parse_instructions(scrape.instructions()):
-            recipe_json['steps'].append({'instruction': i, 'ingredients': [], 'show_ingredients_table': request.user.userpreference.show_step_ingredients, })
+            recipe_json['steps'].append({
+                'instruction': i,
+                'ingredients': [],
+                'show_ingredients_table': request.user.userpreference.show_step_ingredients,
+            })
     except Exception:
         pass
     if len(recipe_json['steps']) == 0:
-        recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
+        recipe_json['steps'].append({
+            'instruction': '',
+            'ingredients': [],
+        })
 
     recipe_json['description'] = recipe_json['description'][:512]
     if len(recipe_json['description']) > 256:  # split at 256 as long descriptions don't look good on recipe cards
@@ -182,20 +186,20 @@ def get_from_scraper(scrape, request):
                         'original_text': x
                     }
                     if unit:
-                        ingredient['unit'] = {'name': unit, }
+                        ingredient['unit'] = {
+                            'name': unit,
+                        }
                     recipe_json['steps'][0]['ingredients'].append(ingredient)
                 except Exception:
-                    recipe_json['steps'][0]['ingredients'].append(
-                        {
-                            'amount': 0,
-                            'unit': None,
-                            'food': {
-                                'name': x,
-                            },
-                            'note': '',
-                            'original_text': x
-                        }
-                    )
+                    recipe_json['steps'][0]['ingredients'].append({
+                        'amount': 0,
+                        'unit': None,
+                        'food': {
+                            'name': x,
+                        },
+                        'note': '',
+                        'original_text': x
+                    })
     except Exception:
         pass
 
@@ -248,14 +252,16 @@ def get_from_youtube_scraper(url, request):
         'working_time': 0,
         'waiting_time': 0,
         'image': "",
-        'keywords': [{'name': kw.name, 'label': kw.name, 'id': kw.pk}],
+        'keywords': [{
+            'name': kw.name,
+            'label': kw.name,
+            'id': kw.pk
+        }],
         'source_url': url,
-        'steps': [
-            {
-                'ingredients': [],
-                'instruction': ''
-            }
-        ]
+        'steps': [{
+            'ingredients': [],
+            'instruction': ''
+        }]
     }
 
     try:
@@ -452,10 +458,7 @@ def normalize_string(string):
 
 
 def iso_duration_to_minutes(string):
-    match = re.match(
-        r'P((?P<years>\d+)Y)?((?P<months>\d+)M)?((?P<weeks>\d+)W)?((?P<days>\d+)D)?T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+)S)?',
-        string
-    ).groupdict()
+    match = re.match(r'P((?P<years>\d+)Y)?((?P<months>\d+)M)?((?P<weeks>\d+)W)?((?P<days>\d+)D)?T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+)S)?', string).groupdict()
     return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)
 
 
diff --git a/cookbook/tests/other/test_automations.py b/cookbook/tests/other/test_automations.py
index 8b7b5568f..810a73f74 100644
--- a/cookbook/tests/other/test_automations.py
+++ b/cookbook/tests/other/test_automations.py
@@ -4,10 +4,10 @@ import pytest
 from django.contrib import auth
 from django.test import RequestFactory
 from django_scopes import scope
+from recipe_scrapers import scrape_html
 
 from cookbook.helper.automation_helper import AutomationEngine
 from cookbook.helper.recipe_url_import import get_from_scraper
-from cookbook.helper.scrapers.scrapers import text_scraper
 from cookbook.models import Automation
 
 DATA_DIR = "cookbook/tests/other/test_data/"
@@ -73,12 +73,14 @@ def test_unit_automation(u1_s1, arg):
         assert (automation.apply_unit_automation(arg[0]) == target_name) is True
 
 
-@pytest.mark.parametrize("arg", [
-    [[1, 'egg', 'white'], '', [1, '', 'egg', 'white']],
-    [[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']],
-    [[1, 'êgg', 'white'], '', [1, 'êgg', 'white']],
-    [[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']],
-])
+@pytest.mark.parametrize(
+    "arg", [
+        [[1, 'egg', 'white'], '', [1, '', 'egg', 'white']],
+        [[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']],
+        [[1, 'êgg', 'white'], '', [1, 'êgg', 'white']],
+        [[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']],
+    ]
+)
 def test_never_unit_automation(u1_s1, arg):
     user = auth.get_user(u1_s1)
     space = user.userspace_set.first().space
@@ -97,13 +99,15 @@ def test_never_unit_automation(u1_s1, arg):
     ['.*allrecipes.*', True],
     ['.*google.*', False],
 ])
-@pytest.mark.parametrize("arg", [
-    [Automation.DESCRIPTION_REPLACE],
-    [Automation.INSTRUCTION_REPLACE],
-    [Automation.NAME_REPLACE],
-    [Automation.FOOD_REPLACE],
-    [Automation.UNIT_REPLACE],
-])
+@pytest.mark.parametrize(
+    "arg", [
+        [Automation.DESCRIPTION_REPLACE],
+        [Automation.INSTRUCTION_REPLACE],
+        [Automation.NAME_REPLACE],
+        [Automation.FOOD_REPLACE],
+        [Automation.UNIT_REPLACE],
+    ]
+)
 def test_regex_automation(u1_s1, arg, source):
     user = auth.get_user(u1_s1)
     space = user.userspace_set.first().space
@@ -124,11 +128,13 @@ def test_regex_automation(u1_s1, arg, source):
         assert (automation.apply_regex_replace_automation(fail, arg[0]) == target) == False
 
 
-@pytest.mark.parametrize("arg", [
-    ['second first', 'first second'],
-    ['longer string second first longer string', 'longer string first second longer string'],
-    ['second fails first', 'second fails first'],
-])
+@pytest.mark.parametrize(
+    "arg", [
+        ['second first', 'first second'],
+        ['longer string second first longer string', 'longer string first second longer string'],
+        ['second fails first', 'second fails first'],
+    ]
+)
 def test_transpose_automation(u1_s1, arg):
     user = auth.get_user(u1_s1)
     space = user.userspace_set.first().space
@@ -160,7 +166,7 @@ def test_url_import_regex_replace(u1_s1):
     else:
         test_file = os.path.join(os.getcwd(), 'cookbook', 'tests', 'other', 'test_data', recipe)
     with open(test_file, 'r', encoding='UTF-8') as d:
-        scrape = text_scraper(text=d.read(), url="https://www.allrecipes.com")
+        scrape = scrape_html(html=d.read(), org_url="https://testrecipe.test", supported_only=False)
     with scope(space=space):
         for t in types:
             Automation.objects.get_or_create(name=t, type=t, param_1='.*', param_2=find_text, param_3='', created_by=user, space=space)