improved website parser

2026-01-01 04:10:06 -05:00 · 2020-08-26 11:37:59 +02:00
parent 78be002134
commit 0b948618f3
2 changed files with 4 additions and 4 deletions
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -18,7 +18,7 @@ def get_from_html(html_text, url):
    # first try finding ld+json as its most common
    for ld in soup.find_all('script', type='application/ld+json'):
        try:
-            ld_json = json.loads(ld.string)
+            ld_json = json.loads(ld.string.replace('\n', ''))
            if type(ld_json) != list:
                ld_json = [ld_json]

@@ -31,8 +31,8 @@ def get_from_html(html_text, url):

                if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe':
                    return find_recipe_json(ld_json_item, url)
-        except JSONDecodeError:
-            JsonResponse({'error': True, 'msg': _('The requested site provided malformed data and cannot be read.')}, status=400)
+        except JSONDecodeError as e:
+            return JsonResponse({'error': True, 'msg': _('The requested site provided malformed data and cannot be read.')}, status=400)

    # now try to find microdata
    items = microdata.get_items(html_text)