#1552 Import Recipes from Cookidoo

This commit is contained in:
Marcus Wolschon
2023-01-13 21:19:49 +01:00
parent c2a8214290
commit be24e25ae4
3 changed files with 74 additions and 2 deletions

View File

@@ -0,0 +1,67 @@
from recipe_scrapers._abstract import AbstractScraper
from gettext import gettext as _
class Cookidoo(AbstractScraper):
def normalize_instruction(self, instruction):
if instruction is None:
return ""
# handle Thermomix-specific instructions that happen in nearly every receipe on Cookidoo
return instruction \
.replace("<nobr>", "**") \
.replace("</nobr>", "**") \
.replace("", _('Linkslauf')) \
.replace("", _('Kochlöffel')) \
.replace("", _('Kneten')) \
.replace("Andicken ", _('Andicken')) \
.replace("Erwärmen ", _('Erwärmen')) \
.replace("Fermentieren ", _('Fermentieren')) \
.replace("Rühraufsatz einsetzen", "**Rühraufsatz einsetzen**") \
.replace("Rühraufsatz entfernen", "**Rühraufsatz entfernen**")
def instructions(self):
instructions = self.schema.data.get("recipeInstructions") or ""
if isinstance(instructions, list):
instructions_gist = []
step_number = 1
for schema_instruction_item in instructions:
instructions_gist += self.extract_instructions_text(schema_instruction_item, "#", step_number)
step_number = step_number + 1
# join all steps into a recipe
return "".join(self.normalize_instruction(instruction)
for instruction in instructions_gist)
return instructions
def extract_instructions_text(self, schema_item, prefix, start_step_number):
step_number = start_step_number
step_format = "\n\n" + prefix + _("Step {}") + "\n\n{}"
section_format = "\n\n{}\n\n"
instructions_gist = []
if type(schema_item) is str:
instructions_gist.append(step_format.format(step_number, schema_item))
step_number = step_number + 1
elif schema_item.get("@type") == "HowToStep":
if schema_item.get("name", False):
# some sites have duplicated name and text properties (1:1)
# others have name same as text but truncated to X chars.
# ignore name in these cases and add the name value only if it's different from the text
if not schema_item.get("text").startswith(
schema_item.get("name").rstrip(".")
):
instructions_gist.append(step_format.format(step_number, schema_item.get("name")))
instructions_gist.append(step_format.format(step_number, schema_item.get("text")))
elif schema_item.get("@type") == "HowToSection":
section_name = schema_item.get("name") or schema_item.get("Name") or _("Instructions")
instructions_gist.append(section_format.format(section_name))
step_number = 1
for item in schema_item.get("itemListElement"):
instructions_gist += self.extract_instructions_text(item, "#" + prefix, step_number)
step_number = step_number + 1
return instructions_gist
def ingredients(self):
return self.schema.ingredients()

View File

@@ -6,11 +6,15 @@ from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
from .cooksillustrated import CooksIllustrated
from .cookidoo import Cookidoo
CUSTOM_SCRAPERS = {
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
"cookidoo.de": Cookidoo,
"cookidoo.at": Cookidoo,
"cookidoo.ch": Cookidoo,
}
SCRAPERS.update(CUSTOM_SCRAPERS)

View File

@@ -40,8 +40,9 @@ django-storages==1.13.2
boto3==1.26.41
django-prometheus==2.2.0
django-hCaptcha==0.2.0
python-ldap==3.4.3
django-auth-ldap==4.1.0
#python-ldap==3.4.3
django-python3-ldap
#django-auth-ldap==4.1.0
pytest-factoryboy==2.5.0
pyppeteer==1.0.2
validators==0.20.0