From 0b948618f33827836db35eb9ee0395f252b5e115 Mon Sep 17 00:00:00 2001 From: vabene1111 Date: Wed, 26 Aug 2020 11:37:59 +0200 Subject: [PATCH] improved website parser --- cookbook/helper/recipe_url_import.py | 6 +++--- cookbook/tests/other/test_edits_recipe.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index 94e352ca1..a8dd84b4e 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -18,7 +18,7 @@ def get_from_html(html_text, url): # first try finding ld+json as its most common for ld in soup.find_all('script', type='application/ld+json'): try: - ld_json = json.loads(ld.string) + ld_json = json.loads(ld.string.replace('\n', '')) if type(ld_json) != list: ld_json = [ld_json] @@ -31,8 +31,8 @@ def get_from_html(html_text, url): if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe': return find_recipe_json(ld_json_item, url) - except JSONDecodeError: - JsonResponse({'error': True, 'msg': _('The requested site provided malformed data and cannot be read.')}, status=400) + except JSONDecodeError as e: + return JsonResponse({'error': True, 'msg': _('The requested site provided malformed data and cannot be read.')}, status=400) # now try to find microdata items = microdata.get_items(html_text) diff --git a/cookbook/tests/other/test_edits_recipe.py b/cookbook/tests/other/test_edits_recipe.py index 97dcdfcaa..0ad081a87 100644 --- a/cookbook/tests/other/test_edits_recipe.py +++ b/cookbook/tests/other/test_edits_recipe.py @@ -12,7 +12,7 @@ class TestEditsRecipe(TestBase): {'file': 'cookbook/tests/resources/websites/ld_json_2.html', 'result_length': 1450}, {'file': 'cookbook/tests/resources/websites/ld_json_3.html', 'result_length': 1545}, {'file': 'cookbook/tests/resources/websites/ld_json_4.html', 'result_length': 1657}, - {'file': 'cookbook/tests/resources/websites/ld_json_invalid.html', 'result_length': 115}, + {'file': 'cookbook/tests/resources/websites/ld_json_invalid.html', 'result_length': 88}, {'file': 'cookbook/tests/resources/websites/ld_json_itemList.html', 'result_length': 3131}, {'file': 'cookbook/tests/resources/websites/ld_json_multiple.html', 'result_length': 1546}, {'file': 'cookbook/tests/resources/websites/micro_data_1.html', 'result_length': 1022},