Merge branch 'develop' of https://github.com/TandoorRecipes/recipes into develop

2026-01-05 14:18:51 -05:00 · 2022-05-11 17:10:18 +02:00
parent 7276cea3d5 c0c996622e
commit b3cc9967f5
28 changed files with 71 additions and 50 deletions
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -9,6 +9,8 @@ from recipe_scrapers._utils import get_host_name, normalize_string

 from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.scrapers.scrapers import text_scraper
+from recipe_scrapers import scrape_me
+from recipe_scrapers._exceptions import NoSchemaFoundInWildMode


 def get_recipe_from_source(text, url, request):
@@ -63,34 +65,41 @@ def get_recipe_from_source(text, url, request):
    html_data = []
    images = []
    text = unquote(text)
+    scrape = None

-    try:
-        parse_list.append(remove_graph(json.loads(text)))
-        if not url and 'url' in parse_list[0]:
-            url = parse_list[0]['url']
-        scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
+    if url:
+        try:
+            scrape = scrape_me(url_path=url, wild_mode=True)
+        except(NoSchemaFoundInWildMode):
+            pass
+    if not scrape:
+        try:
+            parse_list.append(remove_graph(json.loads(text)))
+            if not url and 'url' in parse_list[0]:
+                url = parse_list[0]['url']
+            scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)

-    except JSONDecodeError:
-        soup = BeautifulSoup(text, "html.parser")
-        html_data = get_from_html(soup)
-        images += get_images_from_source(soup, url)
-        for el in soup.find_all('script', type='application/ld+json'):
-            el = remove_graph(el)
-            if not url and 'url' in el:
-                url = el['url']
-            if type(el) == list:
-                for le in el:
-                    parse_list.append(le)
-            elif type(el) == dict:
-                parse_list.append(el)
-        for el in soup.find_all(type='application/json'):
-            el = remove_graph(el)
-            if type(el) == list:
-                for le in el:
-                    parse_list.append(le)
-            elif type(el) == dict:
-                parse_list.append(el)
-        scrape = text_scraper(text, url=url)
+        except JSONDecodeError:
+            soup = BeautifulSoup(text, "html.parser")
+            html_data = get_from_html(soup)
+            images += get_images_from_source(soup, url)
+            for el in soup.find_all('script', type='application/ld+json'):
+                el = remove_graph(el)
+                if not url and 'url' in el:
+                    url = el['url']
+                if type(el) == list:
+                    for le in el:
+                        parse_list.append(le)
+                elif type(el) == dict:
+                    parse_list.append(el)
+            for el in soup.find_all(type='application/json'):
+                el = remove_graph(el)
+                if type(el) == list:
+                    for le in el:
+                        parse_list.append(le)
+                elif type(el) == dict:
+                    parse_list.append(el)
+            scrape = text_scraper(text, url=url)

    recipe_json = helper.get_from_scraper(scrape, request)

--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -114,7 +114,14 @@ def get_from_scraper(scrape, request):
        except Exception:
            pass

-    if source_url := scrape.url:
+    try:
+        source_url = scrape.canonical_url()
+    except Exception:
+        try: 
+            source_url = scrape.url
+        except Exception:
+            pass
+    if source_url:
        recipe_json['source_url'] = source_url
        try:
            keywords.append(source_url.replace('http://', '').replace('https://', '').split('/')[0])
@@ -129,9 +136,11 @@ def get_from_scraper(scrape, request):
    ingredient_parser = IngredientParser(request, True)

    recipe_json['steps'] = []
-
-    for i in parse_instructions(scrape.instructions()):
-        recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
+    try:
+        for i in parse_instructions(scrape.instructions()):
+            recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
+    except Exception:
+        pass
    if len(recipe_json['steps']) == 0:
        recipe_json['steps'].append({'instruction': '', 'ingredients': [], })

--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -1,6 +1,6 @@
 from bs4 import BeautifulSoup
 from json import JSONDecodeError
-from recipe_scrapers import SCRAPERS, get_host_name
+from recipe_scrapers import SCRAPERS 
 from recipe_scrapers._factory import SchemaScraperFactory
 from recipe_scrapers._schemaorg import SchemaOrg

@@ -15,13 +15,7 @@ SCRAPERS.update(CUSTOM_SCRAPERS)


 def text_scraper(text, url=None):
-    domain = None
-    if url:
-        domain = get_host_name(url)
-    if domain in SCRAPERS:
-        scraper_class = SCRAPERS[domain]
-    else:
-        scraper_class = SchemaScraperFactory.SchemaScraper
+    scraper_class = SchemaScraperFactory.SchemaScraper

    class TextScraper(scraper_class):
        def __init__(
--- a/cookbook/integration/paprika.py
+++ b/cookbook/integration/paprika.py
@@ -27,7 +27,7 @@ class Paprika(Integration):
                recipe.description = '' if len(recipe_json['description'].strip()) > 500 else recipe_json['description'].strip()

            try:
-                if 'servings' in recipe_json['servings']:
+                if 'servings' in recipe_json:
                    recipe.servings = parse_servings(recipe_json['servings'])
                    recipe.servings_text = parse_servings_text(recipe_json['servings'])

--- a/cookbook/integration/plantoeat.py
+++ b/cookbook/integration/plantoeat.py
@@ -78,7 +78,11 @@ class Plantoeat(Integration):
        current_recipe = ''

        for fl in file.readlines():
-            line = fl.decode("windows-1250")
+            try:
+                line = fl.decode("utf-8")
+            except UnicodeDecodeError:
+                line = fl.decode("windows-1250")
+
            if line.startswith('--------------'):
                if current_recipe != '':
                    recipe_list.append(current_recipe)
--- a/cookbook/locale/bg/LC_MESSAGES/django.mo
+++ b/cookbook/locale/bg/LC_MESSAGES/django.mo
--- a/cookbook/locale/da/LC_MESSAGES/django.mo
+++ b/cookbook/locale/da/LC_MESSAGES/django.mo
--- a/cookbook/locale/de/LC_MESSAGES/django.mo
+++ b/cookbook/locale/de/LC_MESSAGES/django.mo
--- a/cookbook/locale/en/LC_MESSAGES/django.mo
+++ b/cookbook/locale/en/LC_MESSAGES/django.mo
--- a/cookbook/locale/es/LC_MESSAGES/django.mo
+++ b/cookbook/locale/es/LC_MESSAGES/django.mo
--- a/cookbook/locale/fi/LC_MESSAGES/django.mo
+++ b/cookbook/locale/fi/LC_MESSAGES/django.mo
--- a/cookbook/locale/lv/LC_MESSAGES/django.mo
+++ b/cookbook/locale/lv/LC_MESSAGES/django.mo
--- a/cookbook/locale/nl/LC_MESSAGES/django.mo
+++ b/cookbook/locale/nl/LC_MESSAGES/django.mo
--- a/cookbook/locale/pt/LC_MESSAGES/django.mo
+++ b/cookbook/locale/pt/LC_MESSAGES/django.mo
--- a/cookbook/locale/rn/LC_MESSAGES/django.mo
+++ b/cookbook/locale/rn/LC_MESSAGES/django.mo
--- a/cookbook/locale/sv/LC_MESSAGES/django.mo
+++ b/cookbook/locale/sv/LC_MESSAGES/django.mo
--- a/cookbook/locale/tr/LC_MESSAGES/django.mo
+++ b/cookbook/locale/tr/LC_MESSAGES/django.mo
--- a/cookbook/locale/zh_CN/LC_MESSAGES/django.mo
+++ b/cookbook/locale/zh_CN/LC_MESSAGES/django.mo