From dabcea6ba78c157e560714c65e681aa2db615e26 Mon Sep 17 00:00:00 2001
From: tomtjes <7606307+tomtjes@users.noreply.github.com>
Date: Wed, 13 Jul 2022 14:37:16 -0400
Subject: [PATCH] Update copymethat.py

- make use of field for source URL
- preserve "I made this" flag as keyword
- preserve long descriptions in full at bottom of steps
- preserve ingredient and step headers
---
 cookbook/integration/copymethat.py | 94 ++++++++++++++++++++----------
 1 file changed, 63 insertions(+), 31 deletions(-)

diff --git a/cookbook/integration/copymethat.py b/cookbook/integration/copymethat.py
index 2a9c56521..a581ab73d 100644
--- a/cookbook/integration/copymethat.py
+++ b/cookbook/integration/copymethat.py
@@ -2,7 +2,7 @@ import re
 from io import BytesIO
 from zipfile import ZipFile
 
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 from django.utils.translation import gettext as _
 
 from cookbook.helper.ingredient_parser import IngredientParser
@@ -21,18 +21,21 @@ class CopyMeThat(Integration):
 
     def get_recipe_from_file(self, file):
         # 'file' comes is as a beautifulsoup object
-        recipe = Recipe.objects.create(name=file.find("div", {"id": "name"}).text.strip(), created_by=self.request.user, internal=True, space=self.request.space, )
+        try:
+            source = file.find("a", {"id": "original_link"}).text
+        except AttributeError:
+            source = ''
+
+        recipe = Recipe.objects.create(name=file.find("div", {"id": "name"}).text.strip()[:128], source_url=source, created_by=self.request.user, internal=True, space=self.request.space, )
 
         for category in file.find_all("span", {"class": "recipeCategory"}):
             keyword, created = Keyword.objects.get_or_create(name=category.text, space=self.request.space)
             recipe.keywords.add(keyword)
-
+        
         try:
             recipe.servings = parse_servings(file.find("a", {"id": "recipeYield"}).text.strip())
             recipe.working_time = iso_duration_to_minutes(file.find("span", {"meta": "prepTime"}).text.strip())
             recipe.waiting_time = iso_duration_to_minutes(file.find("span", {"meta": "cookTime"}).text.strip())
-            recipe.description = (file.find("div ", {"id": "description"}).text.strip())[:512]
-
         except AttributeError:
             pass
 
@@ -42,36 +45,65 @@ class CopyMeThat(Integration):
         except AttributeError:
             pass
 
-        step = Step.objects.create(instruction='', space=self.request.space, )
-
-        ingredient_parser = IngredientParser(self.request, True)
-        for ingredient in file.find_all("li", {"class": "recipeIngredient"}):
-            if ingredient.text == "":
-                continue
-            amount, unit, food, note = ingredient_parser.parse(ingredient.text.strip())
-            f = ingredient_parser.get_food(food)
-            u = ingredient_parser.get_unit(unit)
-            step.ingredients.add(Ingredient.objects.create(
-                food=f, unit=u, amount=amount, note=note, original_text=ingredient.text.strip(), space=self.request.space,
-            ))
-
-        for s in file.find_all("li", {"class": "instruction"}):
-            if s.text == "":
-                continue
-            step.instruction += s.text.strip() + ' \n\n'
-
-        for s in file.find_all("li", {"class": "recipeNote"}):
-            if s.text == "":
-                continue
-            step.instruction += s.text.strip() + ' \n\n'
-
         try:
-            if file.find("a", {"id": "original_link"}).text != '':
-                step.instruction += "\n\n" + _("Imported from") + ": " + file.find("a", {"id": "original_link"}).text
-                step.save()
+            if len(file.find("span", {"id": "made_this"}).text.strip()) > 0:
+                recipe.keywords.add(Keyword.objects.get_or_create(space=self.request.space, name=_('I made this'))[0])
         except AttributeError:
             pass
 
+        step = Step.objects.create(instruction='', space=self.request.space, )
+
+        ingredient_parser = IngredientParser(self.request, True)
+
+        ingredients = file.find("ul", {"id": "recipeIngredients"})
+        if isinstance(ingredients, Tag):
+            for ingredient in ingredients.children:
+                if not isinstance(ingredient, Tag) or ingredient.text == "":
+                    continue
+                if any(x in ingredient['class'] for x in ["recipeIngredient_subheader", "recipeIngredient_note"]):
+                    step.ingredients.add(Ingredient.objects.create(is_header=True, note=ingredient.text.strip(), original_text=ingredient.text.strip(), space=self.request.space, ))
+                else:
+                    amount, unit, food, note = ingredient_parser.parse(ingredient.text.strip())
+                    f = ingredient_parser.get_food(food)
+                    u = ingredient_parser.get_unit(unit)
+                    step.ingredients.add(Ingredient.objects.create(food=f, unit=u, amount=amount, note=note, original_text=ingredient.text.strip(), space=self.request.space, ))
+
+        instructions = file.find("ol", {"id": "recipeInstructions"})
+        if isinstance(instructions, Tag):
+            for instruction in instructions.children:
+                if not isinstance(instruction, Tag) or instruction.text == "":
+                    continue
+                if "instruction_subheader" in instruction['class']:
+                    if step.instruction:
+                        step.save()
+                        recipe.steps.add(step)
+                        step = Step.objects.create(instruction='', space=self.request.space, )
+                    
+                    step.name = instruction.text.strip()[:128]
+                else:
+                    step.instruction += instruction.text.strip() + ' \n\n'
+
+        notes = file.find_all("li", {"class": "recipeNote"})
+        if notes:
+            step.instruction += '*Notes:* \n\n'
+
+            for n in notes:
+                if n.text == "":
+                    continue
+                step.instruction += '*' + n.text.strip() + '* \n\n'
+
+        description = ''
+        try:
+            description = file.find("div", {"id": "description"}).text.strip()
+        except AttributeError:
+            pass
+        if len(description) <= 512:
+            recipe.description = description
+        else:
+            recipe.description = description[:480] + ' ... (full description below)'
+            step.instruction += '*Description:* \n\n*' + description + '* \n\n'
+
+        step.save()
         recipe.steps.add(step)
 
         # import the Primary recipe image that is stored in the Zip