change test_automations to use scrape_html

remove reliance on custom text_scraper class
changed url on test case to generic wildmode vs specific site
This commit is contained in:
smilerz
2024-04-12 09:20:56 -05:00
parent 7133249f4b
commit 1700b9c531
2 changed files with 59 additions and 50 deletions

View File

@@ -15,12 +15,9 @@ from cookbook.models import Automation, Keyword, PropertyType
def get_from_scraper(scrape, request):
# converting the scrape_me object to the existing json format based on ld+json
# converting the scrape_html object to the existing json format based on ld+json
recipe_json = {
'steps': [],
'internal': True
}
recipe_json = {'steps': [], 'internal': True}
keywords = []
# assign source URL
@@ -157,11 +154,18 @@ def get_from_scraper(scrape, request):
# assign steps
try:
for i in parse_instructions(scrape.instructions()):
recipe_json['steps'].append({'instruction': i, 'ingredients': [], 'show_ingredients_table': request.user.userpreference.show_step_ingredients, })
recipe_json['steps'].append({
'instruction': i,
'ingredients': [],
'show_ingredients_table': request.user.userpreference.show_step_ingredients,
})
except Exception:
pass
if len(recipe_json['steps']) == 0:
recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
recipe_json['steps'].append({
'instruction': '',
'ingredients': [],
})
recipe_json['description'] = recipe_json['description'][:512]
if len(recipe_json['description']) > 256: # split at 256 as long descriptions don't look good on recipe cards
@@ -182,20 +186,20 @@ def get_from_scraper(scrape, request):
'original_text': x
}
if unit:
ingredient['unit'] = {'name': unit, }
ingredient['unit'] = {
'name': unit,
}
recipe_json['steps'][0]['ingredients'].append(ingredient)
except Exception:
recipe_json['steps'][0]['ingredients'].append(
{
'amount': 0,
'unit': None,
'food': {
'name': x,
},
'note': '',
'original_text': x
}
)
recipe_json['steps'][0]['ingredients'].append({
'amount': 0,
'unit': None,
'food': {
'name': x,
},
'note': '',
'original_text': x
})
except Exception:
pass
@@ -248,14 +252,16 @@ def get_from_youtube_scraper(url, request):
'working_time': 0,
'waiting_time': 0,
'image': "",
'keywords': [{'name': kw.name, 'label': kw.name, 'id': kw.pk}],
'keywords': [{
'name': kw.name,
'label': kw.name,
'id': kw.pk
}],
'source_url': url,
'steps': [
{
'ingredients': [],
'instruction': ''
}
]
'steps': [{
'ingredients': [],
'instruction': ''
}]
}
try:
@@ -452,10 +458,7 @@ def normalize_string(string):
def iso_duration_to_minutes(string):
match = re.match(
r'P((?P<years>\d+)Y)?((?P<months>\d+)M)?((?P<weeks>\d+)W)?((?P<days>\d+)D)?T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+)S)?',
string
).groupdict()
match = re.match(r'P((?P<years>\d+)Y)?((?P<months>\d+)M)?((?P<weeks>\d+)W)?((?P<days>\d+)D)?T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+)S)?', string).groupdict()
return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)

View File

@@ -4,10 +4,10 @@ import pytest
from django.contrib import auth
from django.test import RequestFactory
from django_scopes import scope
from recipe_scrapers import scrape_html
from cookbook.helper.automation_helper import AutomationEngine
from cookbook.helper.recipe_url_import import get_from_scraper
from cookbook.helper.scrapers.scrapers import text_scraper
from cookbook.models import Automation
DATA_DIR = "cookbook/tests/other/test_data/"
@@ -73,12 +73,14 @@ def test_unit_automation(u1_s1, arg):
assert (automation.apply_unit_automation(arg[0]) == target_name) is True
@pytest.mark.parametrize("arg", [
[[1, 'egg', 'white'], '', [1, '', 'egg', 'white']],
[[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']],
[[1, 'êgg', 'white'], '', [1, 'êgg', 'white']],
[[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']],
])
@pytest.mark.parametrize(
"arg", [
[[1, 'egg', 'white'], '', [1, '', 'egg', 'white']],
[[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']],
[[1, 'êgg', 'white'], '', [1, 'êgg', 'white']],
[[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']],
]
)
def test_never_unit_automation(u1_s1, arg):
user = auth.get_user(u1_s1)
space = user.userspace_set.first().space
@@ -97,13 +99,15 @@ def test_never_unit_automation(u1_s1, arg):
['.*allrecipes.*', True],
['.*google.*', False],
])
@pytest.mark.parametrize("arg", [
[Automation.DESCRIPTION_REPLACE],
[Automation.INSTRUCTION_REPLACE],
[Automation.NAME_REPLACE],
[Automation.FOOD_REPLACE],
[Automation.UNIT_REPLACE],
])
@pytest.mark.parametrize(
"arg", [
[Automation.DESCRIPTION_REPLACE],
[Automation.INSTRUCTION_REPLACE],
[Automation.NAME_REPLACE],
[Automation.FOOD_REPLACE],
[Automation.UNIT_REPLACE],
]
)
def test_regex_automation(u1_s1, arg, source):
user = auth.get_user(u1_s1)
space = user.userspace_set.first().space
@@ -124,11 +128,13 @@ def test_regex_automation(u1_s1, arg, source):
assert (automation.apply_regex_replace_automation(fail, arg[0]) == target) == False
@pytest.mark.parametrize("arg", [
['second first', 'first second'],
['longer string second first longer string', 'longer string first second longer string'],
['second fails first', 'second fails first'],
])
@pytest.mark.parametrize(
"arg", [
['second first', 'first second'],
['longer string second first longer string', 'longer string first second longer string'],
['second fails first', 'second fails first'],
]
)
def test_transpose_automation(u1_s1, arg):
user = auth.get_user(u1_s1)
space = user.userspace_set.first().space
@@ -160,7 +166,7 @@ def test_url_import_regex_replace(u1_s1):
else:
test_file = os.path.join(os.getcwd(), 'cookbook', 'tests', 'other', 'test_data', recipe)
with open(test_file, 'r', encoding='UTF-8') as d:
scrape = text_scraper(text=d.read(), url="https://www.allrecipes.com")
scrape = scrape_html(html=d.read(), org_url="https://testrecipe.test", supported_only=False)
with scope(space=space):
for t in types:
Automation.objects.get_or_create(name=t, type=t, param_1='.*', param_2=find_text, param_3='', created_by=user, space=space)