mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-11 09:07:12 -05:00
#1552 Import Recipes from Cookidoo
This commit is contained in:
67
cookbook/helper/scrapers/cookidoo.py
Normal file
67
cookbook/helper/scrapers/cookidoo.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
from recipe_scrapers._abstract import AbstractScraper
|
||||||
|
from gettext import gettext as _
|
||||||
|
|
||||||
|
|
||||||
|
class Cookidoo(AbstractScraper):
|
||||||
|
|
||||||
|
def normalize_instruction(self, instruction):
|
||||||
|
if instruction is None:
|
||||||
|
return ""
|
||||||
|
# handle Thermomix-specific instructions that happen in nearly every receipe on Cookidoo
|
||||||
|
return instruction \
|
||||||
|
.replace("<nobr>", "**") \
|
||||||
|
.replace("</nobr>", "**") \
|
||||||
|
.replace("", _('Linkslauf')) \
|
||||||
|
.replace("", _('Kochlöffel')) \
|
||||||
|
.replace("", _('Kneten')) \
|
||||||
|
.replace("Andicken ", _('Andicken')) \
|
||||||
|
.replace("Erwärmen ", _('Erwärmen')) \
|
||||||
|
.replace("Fermentieren ", _('Fermentieren')) \
|
||||||
|
.replace("Rühraufsatz einsetzen", "**Rühraufsatz einsetzen**") \
|
||||||
|
.replace("Rühraufsatz entfernen", "**Rühraufsatz entfernen**")
|
||||||
|
|
||||||
|
def instructions(self):
|
||||||
|
instructions = self.schema.data.get("recipeInstructions") or ""
|
||||||
|
|
||||||
|
if isinstance(instructions, list):
|
||||||
|
instructions_gist = []
|
||||||
|
step_number = 1
|
||||||
|
for schema_instruction_item in instructions:
|
||||||
|
instructions_gist += self.extract_instructions_text(schema_instruction_item, "#", step_number)
|
||||||
|
step_number = step_number + 1
|
||||||
|
|
||||||
|
# join all steps into a recipe
|
||||||
|
return "".join(self.normalize_instruction(instruction)
|
||||||
|
for instruction in instructions_gist)
|
||||||
|
|
||||||
|
return instructions
|
||||||
|
|
||||||
|
def extract_instructions_text(self, schema_item, prefix, start_step_number):
|
||||||
|
step_number = start_step_number
|
||||||
|
step_format = "\n\n" + prefix + _("Step {}") + "\n\n{}"
|
||||||
|
section_format = "\n\n{}\n\n"
|
||||||
|
instructions_gist = []
|
||||||
|
if type(schema_item) is str:
|
||||||
|
instructions_gist.append(step_format.format(step_number, schema_item))
|
||||||
|
step_number = step_number + 1
|
||||||
|
elif schema_item.get("@type") == "HowToStep":
|
||||||
|
if schema_item.get("name", False):
|
||||||
|
# some sites have duplicated name and text properties (1:1)
|
||||||
|
# others have name same as text but truncated to X chars.
|
||||||
|
# ignore name in these cases and add the name value only if it's different from the text
|
||||||
|
if not schema_item.get("text").startswith(
|
||||||
|
schema_item.get("name").rstrip(".")
|
||||||
|
):
|
||||||
|
instructions_gist.append(step_format.format(step_number, schema_item.get("name")))
|
||||||
|
instructions_gist.append(step_format.format(step_number, schema_item.get("text")))
|
||||||
|
elif schema_item.get("@type") == "HowToSection":
|
||||||
|
section_name = schema_item.get("name") or schema_item.get("Name") or _("Instructions")
|
||||||
|
instructions_gist.append(section_format.format(section_name))
|
||||||
|
step_number = 1
|
||||||
|
for item in schema_item.get("itemListElement"):
|
||||||
|
instructions_gist += self.extract_instructions_text(item, "#" + prefix, step_number)
|
||||||
|
step_number = step_number + 1
|
||||||
|
return instructions_gist
|
||||||
|
|
||||||
|
def ingredients(self):
|
||||||
|
return self.schema.ingredients()
|
||||||
@@ -6,11 +6,15 @@ from recipe_scrapers._factory import SchemaScraperFactory
|
|||||||
from recipe_scrapers._schemaorg import SchemaOrg
|
from recipe_scrapers._schemaorg import SchemaOrg
|
||||||
|
|
||||||
from .cooksillustrated import CooksIllustrated
|
from .cooksillustrated import CooksIllustrated
|
||||||
|
from .cookidoo import Cookidoo
|
||||||
|
|
||||||
CUSTOM_SCRAPERS = {
|
CUSTOM_SCRAPERS = {
|
||||||
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
|
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
|
||||||
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
|
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
|
||||||
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
|
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
|
||||||
|
"cookidoo.de": Cookidoo,
|
||||||
|
"cookidoo.at": Cookidoo,
|
||||||
|
"cookidoo.ch": Cookidoo,
|
||||||
}
|
}
|
||||||
SCRAPERS.update(CUSTOM_SCRAPERS)
|
SCRAPERS.update(CUSTOM_SCRAPERS)
|
||||||
|
|
||||||
|
|||||||
@@ -40,8 +40,9 @@ django-storages==1.13.2
|
|||||||
boto3==1.26.41
|
boto3==1.26.41
|
||||||
django-prometheus==2.2.0
|
django-prometheus==2.2.0
|
||||||
django-hCaptcha==0.2.0
|
django-hCaptcha==0.2.0
|
||||||
python-ldap==3.4.3
|
#python-ldap==3.4.3
|
||||||
django-auth-ldap==4.1.0
|
django-python3-ldap
|
||||||
|
#django-auth-ldap==4.1.0
|
||||||
pytest-factoryboy==2.5.0
|
pytest-factoryboy==2.5.0
|
||||||
pyppeteer==1.0.2
|
pyppeteer==1.0.2
|
||||||
validators==0.20.0
|
validators==0.20.0
|
||||||
|
|||||||
Reference in New Issue
Block a user