Fix after rebase

This commit is contained in:
smilerz
2021-05-01 16:30:33 -05:00
parent d7afbc5745
commit 59c33798b8
8 changed files with 278 additions and 35 deletions

View File

@@ -6,8 +6,9 @@ from isodate.isoerror import ISO8601Error
from cookbook.helper.ingredient_parser import parse as parse_single_ingredient
from cookbook.models import Keyword
from django.utils.dateparse import parse_duration
from html import unescape
from recipe_scrapers._schemaorg import SchemaOrgException
from recipe_scrapers._utils import get_minutes, normalize_string
from recipe_scrapers._utils import get_minutes
def get_from_scraper(scrape, space):
@@ -21,7 +22,6 @@ def get_from_scraper(scrape, space):
try:
description = scrape.schema.data.get("description") or ''
except AttributeError:
description = ''
@@ -186,10 +186,6 @@ def parse_ingredients(ingredients):
def parse_description(description):
description = re.sub(r'\n\s*\n', '\n\n', description)
description = re.sub(' +', ' ', description)
description = re.sub('</p>', '\n', description)
description = re.sub('<[^<]+?>', '', description)
return normalize_string(description)
@@ -214,10 +210,6 @@ def parse_instructions(instructions):
instruction_text += str(i)
instructions = instruction_text
instructions = re.sub(r'\n\s*\n', '\n\n', instructions)
instructions = re.sub(' +', ' ', instructions)
instructions = re.sub('</p>', '\n', instructions)
instructions = re.sub('<[^<]+?>', '', instructions)
return normalize_string(instructions)

View File

@@ -46,8 +46,13 @@ class CooksIllustrated(AbstractScraper):
def instructions(self):
if not self.recipe:
self.get_recipe()
if self.recipe.get('headnote', False):
i = ['Note: ' + self.recipe.get('headnote', '')]
else:
i = []
return "\n".join(
[self.recipe['whyThisWorks']]
i
+ [self.recipe.get('whyThisWorks', '')]
+ [
instruction['fields']['content']
for instruction in self.recipe['instructions']

View File

@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup
from json import JSONDecodeError
from recipe_scrapers import SCRAPERS, get_domain, _exception_handling
from recipe_scrapers import SCRAPERS, get_host_name
from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
@@ -30,7 +30,7 @@ def text_scraper(text, url=None):
url=None
):
self.wild_mode = False
self.exception_handling = _exception_handling
self.exception_handling = None # TODO add new method here, old one was deprecated
self.meta_http_equiv = False
self.soup = BeautifulSoup(page_data, "html.parser")
self.url = url