From e345d2eb3914bd5b3132dfe50816023f149cdac9 Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Sun, 2 May 2021 15:44:27 -0500
Subject: [PATCH] updated to handle new behavior of recipe_scrapers

---
 cookbook/helper/recipe_url_import.py | 69 ++++++++++++++++++++--------
 cookbook/helper/scrapers/scrapers.py |  2 +-
 cookbook/tests/other/_recipes.py     |  2 +-
 3 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py
index 508137750..55aa38bb6 100644
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -17,53 +17,84 @@ def get_from_scraper(scrape, space):
 
     recipe_json = {}
     try:
-        recipe_json['name'] = parse_name(scrape.title() or scrape.schema.data.get('name') or '')
-    except (TypeError, AttributeError,ElementNotFoundInHtml):
-        recipe_json['name'] = ''
+        recipe_json['name'] = parse_name(scrape.title() or None)
+    except (TypeError, AttributeError, ElementNotFoundInHtml, NotImplementedError):
+        recipe_json['name'] = None
+    if not recipe_json['name']:
+        try:
+            recipe_json['name'] = scrape.schema.data.get('name') or ''
+        except Exception:
+            recipe_json['name'] = ''
 
     try:
         description = scrape.schema.data.get("description") or ''
-    except (AttributeError,ElementNotFoundInHtml):
+    except (AttributeError, ElementNotFoundInHtml, NotImplementedError, SchemaOrgException):
         description = ''
 
     recipe_json['description'] = parse_description(description)
 
     try:
-        servings = scrape.yields()
-        servings = int(re.findall(r'\b\d+\b', servings)[0])
-    except (AttributeError,ElementNotFoundInHtml, ValueError, IndexError):
-        servings = 1
+        servings = scrape.yields() or None
+    except Exception:
+        servings = None
+    if not servings:
+        try:
+            servings = scrape.schema.data.get('recipeYield') or 1
+        except Exception:
+            servings = 1
+    if type(servings) != int:
+        try:
+            servings = int(re.findall(r'\b\d+\b', servings)[0])
+        except Exception:
+            servings = 1
     recipe_json['servings'] = servings
 
     try:
         recipe_json['prepTime'] = get_minutes(scrape.schema.data.get("prepTime")) or 0
-    except (AttributeError, ElementNotFoundInHtml):
+    except Exception:
         recipe_json['prepTime'] = 0
     try:
         recipe_json['cookTime'] = get_minutes(scrape.schema.data.get("cookTime")) or 0
-    except (AttributeError, ElementNotFoundInHtml):
+    except Exception:
         recipe_json['cookTime'] = 0
     if recipe_json['cookTime'] + recipe_json['prepTime'] == 0:
         try:
             recipe_json['prepTime'] = get_minutes(scrape.total_time()) or 0
-        except (AttributeError,ElementNotFoundInHtml):
-            pass
+        except Exception:
+            try:
+                get_minutes(scrape.schema.data.get("totalTime")) or 0
+            except Exception:
+                pass
 
     try:
-        recipe_json['image'] = parse_image(scrape.image()) or ''
-    except (AttributeError,ElementNotFoundInHtml, TypeError, SchemaOrgException):
-        recipe_json['image'] = ''
+        recipe_json['image'] = parse_image(scrape.image()) or None
+    except Exception:
+        recipe_json['image'] = None
+    if not recipe_json['image']:
+        try:
+            recipe_json['image'] = parse_image(scrape.schema.data.get('image')) or ''
+        except Exception:
+            recipe_json['image'] = ''
 
     keywords = []
     try:
         if scrape.schema.data.get("keywords"):
             keywords += listify_keywords(scrape.schema.data.get("keywords"))
+    except Exception:
+        pass
+    try:
         if scrape.schema.data.get('recipeCategory'):
             keywords += listify_keywords(scrape.schema.data.get("recipeCategory"))
+    except Exception:
+        pass
+    try:
         if scrape.schema.data.get('recipeCuisine'):
             keywords += listify_keywords(scrape.schema.data.get("recipeCuisine"))
+    except Exception:
+        pass
+    try:
         recipe_json['keywords'] = parse_keywords(list(set(map(str.casefold, keywords))), space)
-    except (AttributeError,ElementNotFoundInHtml):
+    except AttributeError:
         recipe_json['keywords'] = keywords
 
     try:
@@ -104,12 +135,12 @@ def get_from_scraper(scrape, space):
                     }
                 )
         recipe_json['recipeIngredient'] = ingredients
-    except (AttributeError,ElementNotFoundInHtml):
+    except Exception:
         recipe_json['recipeIngredient'] = ingredients
 
     try:
         recipe_json['recipeInstructions'] = parse_instructions(scrape.instructions())
-    except (AttributeError,ElementNotFoundInHtml):
+    except Exception:
         recipe_json['recipeInstructions'] = ""
 
     if scrape.url:
@@ -222,6 +253,8 @@ def parse_instructions(instructions):
 
 def parse_image(image):
     # check if list of images is returned, take first if so
+    if not image:
+        return None
     if type(image) == list:
         for pic in image:
             if (type(pic) == str) and (pic[:4] == 'http'):
diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py
index 98807d186..8b0ae78b8 100644
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -30,7 +30,7 @@ def text_scraper(text, url=None):
                 url=None
         ):
             self.wild_mode = False
-            self.exception_handling = None  # TODO add new method here, old one was deprecated
+            # self.exception_handling = None  # TODO add new method here, old one was deprecated
             self.meta_http_equiv = False
             self.soup = BeautifulSoup(page_data, "html.parser")
             self.url = url
diff --git a/cookbook/tests/other/_recipes.py b/cookbook/tests/other/_recipes.py
index 95b9b18f5..adcf3bfa6 100644
--- a/cookbook/tests/other/_recipes.py
+++ b/cookbook/tests/other/_recipes.py
@@ -863,7 +863,7 @@ DELISH = {
     "servings": 6,
     "prepTime": 10,
     "cookTime": 0,
-    "image": '',
+    "image": 'https://hips.hearstapps.com/hmg-prod.s3.amazonaws.com/images/delish-cheesy-asparagus-horizontal-7-1536094595.png',
     "keywords": [
         {
             "id": 2211187,