diff --git a/cookbook/helper/scrapers/cookidoo.py b/cookbook/helper/scrapers/cookidoo.py new file mode 100644 index 000000000..0906465e0 --- /dev/null +++ b/cookbook/helper/scrapers/cookidoo.py @@ -0,0 +1,67 @@ +from recipe_scrapers._abstract import AbstractScraper +from gettext import gettext as _ + + +class Cookidoo(AbstractScraper): + + def normalize_instruction(self, instruction): + if instruction is None: + return "" + # handle Thermomix-specific instructions that happen in nearly every receipe on Cookidoo + return instruction \ + .replace("", "**") \ + .replace("", "**") \ + .replace("", _('Linkslauf')) \ + .replace("", _('Kochlöffel')) \ + .replace("", _('Kneten')) \ + .replace("Andicken ", _('Andicken')) \ + .replace("Erwärmen ", _('Erwärmen')) \ + .replace("Fermentieren ", _('Fermentieren')) \ + .replace("Rühraufsatz einsetzen", "**Rühraufsatz einsetzen**") \ + .replace("Rühraufsatz entfernen", "**Rühraufsatz entfernen**") + + def instructions(self): + instructions = self.schema.data.get("recipeInstructions") or "" + + if isinstance(instructions, list): + instructions_gist = [] + step_number = 1 + for schema_instruction_item in instructions: + instructions_gist += self.extract_instructions_text(schema_instruction_item, "#", step_number) + step_number = step_number + 1 + + # join all steps into a recipe + return "".join(self.normalize_instruction(instruction) + for instruction in instructions_gist) + + return instructions + + def extract_instructions_text(self, schema_item, prefix, start_step_number): + step_number = start_step_number + step_format = "\n\n" + prefix + _("Step {}") + "\n\n{}" + section_format = "\n\n{}\n\n" + instructions_gist = [] + if type(schema_item) is str: + instructions_gist.append(step_format.format(step_number, schema_item)) + step_number = step_number + 1 + elif schema_item.get("@type") == "HowToStep": + if schema_item.get("name", False): + # some sites have duplicated name and text properties (1:1) + # others have name same as text but truncated to X chars. + # ignore name in these cases and add the name value only if it's different from the text + if not schema_item.get("text").startswith( + schema_item.get("name").rstrip(".") + ): + instructions_gist.append(step_format.format(step_number, schema_item.get("name"))) + instructions_gist.append(step_format.format(step_number, schema_item.get("text"))) + elif schema_item.get("@type") == "HowToSection": + section_name = schema_item.get("name") or schema_item.get("Name") or _("Instructions") + instructions_gist.append(section_format.format(section_name)) + step_number = 1 + for item in schema_item.get("itemListElement"): + instructions_gist += self.extract_instructions_text(item, "#" + prefix, step_number) + step_number = step_number + 1 + return instructions_gist + + def ingredients(self): + return self.schema.ingredients() diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py index 7d6c08b15..01dfa374f 100644 --- a/cookbook/helper/scrapers/scrapers.py +++ b/cookbook/helper/scrapers/scrapers.py @@ -6,11 +6,15 @@ from recipe_scrapers._factory import SchemaScraperFactory from recipe_scrapers._schemaorg import SchemaOrg from .cooksillustrated import CooksIllustrated +from .cookidoo import Cookidoo CUSTOM_SCRAPERS = { CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated, CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated, CooksIllustrated.host(site="cookscountry"): CooksIllustrated, + "cookidoo.de": Cookidoo, + "cookidoo.at": Cookidoo, + "cookidoo.ch": Cookidoo, } SCRAPERS.update(CUSTOM_SCRAPERS) diff --git a/requirements.txt b/requirements.txt index cc565492c..bbd4df7a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,8 +40,9 @@ django-storages==1.13.2 boto3==1.26.41 django-prometheus==2.2.0 django-hCaptcha==0.2.0 -python-ldap==3.4.3 -django-auth-ldap==4.1.0 +#python-ldap==3.4.3 +django-python3-ldap +#django-auth-ldap==4.1.0 pytest-factoryboy==2.5.0 pyppeteer==1.0.2 validators==0.20.0