Fix after rebase

2026-01-10 16:47:59 -05:00 · 2021-05-01 16:30:33 -05:00
parent d7afbc5745
commit 59c33798b8
8 changed files with 278 additions and 35 deletions
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -6,8 +6,9 @@ from isodate.isoerror import ISO8601Error
 from cookbook.helper.ingredient_parser import parse as parse_single_ingredient
 from cookbook.models import Keyword
 from django.utils.dateparse import parse_duration
+from html import unescape
 from recipe_scrapers._schemaorg import SchemaOrgException
-from recipe_scrapers._utils import get_minutes, normalize_string
+from recipe_scrapers._utils import get_minutes


 def get_from_scraper(scrape, space):
@@ -21,7 +22,6 @@ def get_from_scraper(scrape, space):

    try:
        description = scrape.schema.data.get("description") or ''
-
    except AttributeError:
        description = ''

@@ -186,10 +186,6 @@ def parse_ingredients(ingredients):


 def parse_description(description):
-    description = re.sub(r'\n\s*\n', '\n\n', description)
-    description = re.sub(' +', ' ', description)
-    description = re.sub('</p>', '\n', description)
-    description = re.sub('<[^<]+?>', '', description)
    return normalize_string(description)


@@ -214,10 +210,6 @@ def parse_instructions(instructions):
                    instruction_text += str(i)
        instructions = instruction_text

-    instructions = re.sub(r'\n\s*\n', '\n\n', instructions)
-    instructions = re.sub(' +', ' ', instructions)
-    instructions = re.sub('</p>', '\n', instructions)
-    instructions = re.sub('<[^<]+?>', '', instructions)
    return normalize_string(instructions)


--- a/cookbook/helper/scrapers/cooksillustrated.py
+++ b/cookbook/helper/scrapers/cooksillustrated.py
@@ -46,8 +46,13 @@ class CooksIllustrated(AbstractScraper):
    def instructions(self):
        if not self.recipe:
            self.get_recipe()
+        if self.recipe.get('headnote', False):
+            i = ['Note: ' + self.recipe.get('headnote', '')]
+        else:
+            i = []
        return "\n".join(
-            [self.recipe['whyThisWorks']]
+            i
+            + [self.recipe.get('whyThisWorks', '')]
            + [
                instruction['fields']['content']
                for instruction in self.recipe['instructions']
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -1,6 +1,6 @@
 from bs4 import BeautifulSoup
 from json import JSONDecodeError
-from recipe_scrapers import SCRAPERS, get_domain, _exception_handling
+from recipe_scrapers import SCRAPERS, get_host_name
 from recipe_scrapers._factory import SchemaScraperFactory
 from recipe_scrapers._schemaorg import SchemaOrg

@@ -30,7 +30,7 @@ def text_scraper(text, url=None):
                url=None
        ):
            self.wild_mode = False
-            self.exception_handling = _exception_handling
+            self.exception_handling = None  # TODO add new method here, old one was deprecated
            self.meta_http_equiv = False
            self.soup = BeautifulSoup(page_data, "html.parser")
            self.url = url