From 1700b9c5313df4f9b53cd11a8396cba00d8cc1de Mon Sep 17 00:00:00 2001 From: smilerz Date: Fri, 12 Apr 2024 09:20:56 -0500 Subject: [PATCH] change test_automations to use scrape_html remove reliance on custom text_scraper class changed url on test case to generic wildmode vs specific site --- cookbook/helper/recipe_url_import.py | 63 +++++++++++++----------- cookbook/tests/other/test_automations.py | 46 +++++++++-------- 2 files changed, 59 insertions(+), 50 deletions(-) diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index aa0bb2d87..eca6813aa 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -15,12 +15,9 @@ from cookbook.models import Automation, Keyword, PropertyType def get_from_scraper(scrape, request): - # converting the scrape_me object to the existing json format based on ld+json + # converting the scrape_html object to the existing json format based on ld+json - recipe_json = { - 'steps': [], - 'internal': True - } + recipe_json = {'steps': [], 'internal': True} keywords = [] # assign source URL @@ -157,11 +154,18 @@ def get_from_scraper(scrape, request): # assign steps try: for i in parse_instructions(scrape.instructions()): - recipe_json['steps'].append({'instruction': i, 'ingredients': [], 'show_ingredients_table': request.user.userpreference.show_step_ingredients, }) + recipe_json['steps'].append({ + 'instruction': i, + 'ingredients': [], + 'show_ingredients_table': request.user.userpreference.show_step_ingredients, + }) except Exception: pass if len(recipe_json['steps']) == 0: - recipe_json['steps'].append({'instruction': '', 'ingredients': [], }) + recipe_json['steps'].append({ + 'instruction': '', + 'ingredients': [], + }) recipe_json['description'] = recipe_json['description'][:512] if len(recipe_json['description']) > 256: # split at 256 as long descriptions don't look good on recipe cards @@ -182,20 +186,20 @@ def get_from_scraper(scrape, request): 'original_text': x } if unit: - ingredient['unit'] = {'name': unit, } + ingredient['unit'] = { + 'name': unit, + } recipe_json['steps'][0]['ingredients'].append(ingredient) except Exception: - recipe_json['steps'][0]['ingredients'].append( - { - 'amount': 0, - 'unit': None, - 'food': { - 'name': x, - }, - 'note': '', - 'original_text': x - } - ) + recipe_json['steps'][0]['ingredients'].append({ + 'amount': 0, + 'unit': None, + 'food': { + 'name': x, + }, + 'note': '', + 'original_text': x + }) except Exception: pass @@ -248,14 +252,16 @@ def get_from_youtube_scraper(url, request): 'working_time': 0, 'waiting_time': 0, 'image': "", - 'keywords': [{'name': kw.name, 'label': kw.name, 'id': kw.pk}], + 'keywords': [{ + 'name': kw.name, + 'label': kw.name, + 'id': kw.pk + }], 'source_url': url, - 'steps': [ - { - 'ingredients': [], - 'instruction': '' - } - ] + 'steps': [{ + 'ingredients': [], + 'instruction': '' + }] } try: @@ -452,10 +458,7 @@ def normalize_string(string): def iso_duration_to_minutes(string): - match = re.match( - r'P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?', - string - ).groupdict() + match = re.match(r'P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?', string).groupdict() return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0) diff --git a/cookbook/tests/other/test_automations.py b/cookbook/tests/other/test_automations.py index 8b7b5568f..810a73f74 100644 --- a/cookbook/tests/other/test_automations.py +++ b/cookbook/tests/other/test_automations.py @@ -4,10 +4,10 @@ import pytest from django.contrib import auth from django.test import RequestFactory from django_scopes import scope +from recipe_scrapers import scrape_html from cookbook.helper.automation_helper import AutomationEngine from cookbook.helper.recipe_url_import import get_from_scraper -from cookbook.helper.scrapers.scrapers import text_scraper from cookbook.models import Automation DATA_DIR = "cookbook/tests/other/test_data/" @@ -73,12 +73,14 @@ def test_unit_automation(u1_s1, arg): assert (automation.apply_unit_automation(arg[0]) == target_name) is True -@pytest.mark.parametrize("arg", [ - [[1, 'egg', 'white'], '', [1, '', 'egg', 'white']], - [[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']], - [[1, 'êgg', 'white'], '', [1, 'êgg', 'white']], - [[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']], -]) +@pytest.mark.parametrize( + "arg", [ + [[1, 'egg', 'white'], '', [1, '', 'egg', 'white']], + [[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']], + [[1, 'êgg', 'white'], '', [1, 'êgg', 'white']], + [[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']], + ] +) def test_never_unit_automation(u1_s1, arg): user = auth.get_user(u1_s1) space = user.userspace_set.first().space @@ -97,13 +99,15 @@ def test_never_unit_automation(u1_s1, arg): ['.*allrecipes.*', True], ['.*google.*', False], ]) -@pytest.mark.parametrize("arg", [ - [Automation.DESCRIPTION_REPLACE], - [Automation.INSTRUCTION_REPLACE], - [Automation.NAME_REPLACE], - [Automation.FOOD_REPLACE], - [Automation.UNIT_REPLACE], -]) +@pytest.mark.parametrize( + "arg", [ + [Automation.DESCRIPTION_REPLACE], + [Automation.INSTRUCTION_REPLACE], + [Automation.NAME_REPLACE], + [Automation.FOOD_REPLACE], + [Automation.UNIT_REPLACE], + ] +) def test_regex_automation(u1_s1, arg, source): user = auth.get_user(u1_s1) space = user.userspace_set.first().space @@ -124,11 +128,13 @@ def test_regex_automation(u1_s1, arg, source): assert (automation.apply_regex_replace_automation(fail, arg[0]) == target) == False -@pytest.mark.parametrize("arg", [ - ['second first', 'first second'], - ['longer string second first longer string', 'longer string first second longer string'], - ['second fails first', 'second fails first'], -]) +@pytest.mark.parametrize( + "arg", [ + ['second first', 'first second'], + ['longer string second first longer string', 'longer string first second longer string'], + ['second fails first', 'second fails first'], + ] +) def test_transpose_automation(u1_s1, arg): user = auth.get_user(u1_s1) space = user.userspace_set.first().space @@ -160,7 +166,7 @@ def test_url_import_regex_replace(u1_s1): else: test_file = os.path.join(os.getcwd(), 'cookbook', 'tests', 'other', 'test_data', recipe) with open(test_file, 'r', encoding='UTF-8') as d: - scrape = text_scraper(text=d.read(), url="https://www.allrecipes.com") + scrape = scrape_html(html=d.read(), org_url="https://testrecipe.test", supported_only=False) with scope(space=space): for t in types: Automation.objects.get_or_create(name=t, type=t, param_1='.*', param_2=find_text, param_3='', created_by=user, space=space)