mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2025-12-24 02:39:20 -05:00
import of gourmet files
* in gourmet export as html and zip folder * 'menus' are not imported Note: it appears that the native format '.grmt' does not include the unit for ingredients
This commit is contained in:
211
cookbook/integration/gourmet.py
Normal file
211
cookbook/integration/gourmet.py
Normal file
@@ -0,0 +1,211 @@
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from lxml import etree
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
from cookbook.helper.HelperFunctions import validate_import_url
|
||||
from cookbook.helper.ingredient_parser import IngredientParser
|
||||
from cookbook.helper.recipe_url_import import parse_servings, parse_servings_text, parse_time, iso_duration_to_minutes
|
||||
from cookbook.integration.integration import Integration
|
||||
from cookbook.models import Ingredient, Recipe, Step, Keyword
|
||||
from recipe_scrapers import scrape_html
|
||||
|
||||
|
||||
class Gourmet(Integration):
|
||||
|
||||
def split_recipe_file(self, file):
|
||||
encoding = 'utf-8'
|
||||
byte_string = file.read()
|
||||
text_obj = byte_string.decode(encoding, errors="ignore")
|
||||
soup = BeautifulSoup(text_obj, "html.parser")
|
||||
return soup.find_all("div", {"class": "recipe"})
|
||||
|
||||
def get_ingredients_recursive(self, step, ingredients, ingredient_parser):
|
||||
if isinstance(ingredients, Tag):
|
||||
for ingredient in ingredients.children:
|
||||
if not isinstance(ingredient, Tag):
|
||||
continue
|
||||
|
||||
if ingredient.name in ["li"]:
|
||||
step_name = "".join(ingredient.findAll(text=True, recursive=False)).strip().rstrip(":")
|
||||
|
||||
step.ingredients.add(Ingredient.objects.create(
|
||||
is_header=True,
|
||||
note=step_name[:256],
|
||||
original_text=step_name,
|
||||
space=self.request.space,
|
||||
))
|
||||
next_ingrediets = ingredient.find("ul", {"class": "ing"})
|
||||
self.get_ingredients_recursive(step, next_ingrediets, ingredient_parser)
|
||||
|
||||
else:
|
||||
try:
|
||||
amount, unit, food, note = ingredient_parser.parse(ingredient.text.strip())
|
||||
f = ingredient_parser.get_food(food)
|
||||
u = ingredient_parser.get_unit(unit)
|
||||
step.ingredients.add(
|
||||
Ingredient.objects.create(
|
||||
food=f,
|
||||
unit=u,
|
||||
amount=amount,
|
||||
note=note,
|
||||
original_text=ingredient.text.strip(),
|
||||
space=self.request.space,
|
||||
)
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def get_recipe_from_file(self, file):
|
||||
# 'file' comes is as a beautifulsoup object
|
||||
|
||||
source_url = None
|
||||
for item in file.find_all('a'):
|
||||
if item.has_attr('href'):
|
||||
source_url = item.get("href")
|
||||
break
|
||||
|
||||
name = file.find("p", {"class": "title"}).find("span", {"itemprop": "name"}).text.strip()
|
||||
|
||||
recipe = Recipe.objects.create(
|
||||
name=name[:128],
|
||||
source_url=source_url,
|
||||
created_by=self.request.user,
|
||||
internal=True,
|
||||
space=self.request.space,
|
||||
)
|
||||
|
||||
for category in file.find_all("span", {"itemprop": "recipeCategory"}):
|
||||
keyword, created = Keyword.objects.get_or_create(name=category.text, space=self.request.space)
|
||||
recipe.keywords.add(keyword)
|
||||
|
||||
try:
|
||||
recipe.servings = parse_servings(file.find("span", {"itemprop": "recipeYield"}).text.strip())
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
prep_time = file.find("span", {"itemprop": "prepTime"}).text.strip().split()
|
||||
prep_time[0] = prep_time[0].replace(',', '.')
|
||||
if prep_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']:
|
||||
prep_time_min = int(float(prep_time[0]) * 60)
|
||||
elif prep_time[1].lower() in ['tag', 'tage', 'day', 'days']:
|
||||
prep_time_min = int(float(prep_time[0]) * 60 * 24)
|
||||
else:
|
||||
prep_time_min = int(prep_time[0])
|
||||
recipe.waiting_time = prep_time_min
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
cook_time = file.find("span", {"itemprop": "cookTime"}).text.strip().split()
|
||||
cook_time[0] = cook_time[0].replace(',', '.')
|
||||
if cook_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']:
|
||||
cook_time_min = int(float(cook_time[0]) * 60)
|
||||
elif cook_time[1].lower() in ['tag', 'tage', 'day', 'days']:
|
||||
cook_time_min = int(float(cook_time[0]) * 60 * 24)
|
||||
else:
|
||||
cook_time_min = int(cook_time[0])
|
||||
|
||||
recipe.working_time = cook_time_min
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
for cuisine in file.find_all('span', {'itemprop': 'recipeCuisine'}):
|
||||
cuisine_name = cuisine.text
|
||||
keyword = Keyword.objects.get_or_create(space=self.request.space, name=cuisine_name)
|
||||
if len(keyword):
|
||||
recipe.keywords.add(keyword[0])
|
||||
|
||||
for category in file.find_all('span', {'itemprop': 'recipeCategory'}):
|
||||
category_name = category.text
|
||||
keyword = Keyword.objects.get_or_create(space=self.request.space, name=category_name)
|
||||
if len(keyword):
|
||||
recipe.keywords.add(keyword[0])
|
||||
|
||||
step = Step.objects.create(
|
||||
instruction='',
|
||||
space=self.request.space,
|
||||
show_ingredients_table=self.request.user.userpreference.show_step_ingredients,
|
||||
)
|
||||
|
||||
ingredient_parser = IngredientParser(self.request, True)
|
||||
|
||||
ingredients = file.find("ul", {"class": "ing"})
|
||||
self.get_ingredients_recursive(step, ingredients, ingredient_parser)
|
||||
|
||||
instructions = file.find("div", {"class": "instructions"})
|
||||
if isinstance(instructions, Tag):
|
||||
for instruction in instructions.children:
|
||||
if not isinstance(instruction, Tag) or instruction.text == "":
|
||||
continue
|
||||
if instruction.name == "h3":
|
||||
if step.instruction:
|
||||
step.save()
|
||||
recipe.steps.add(step)
|
||||
step = Step.objects.create(
|
||||
instruction='',
|
||||
space=self.request.space,
|
||||
)
|
||||
|
||||
step.name = instruction.text.strip()[:128]
|
||||
else:
|
||||
if instruction.name == "div":
|
||||
for instruction_step in instruction.children:
|
||||
for br in instruction_step.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
step.instruction += instruction_step.text.strip() + ' \n\n'
|
||||
|
||||
notes = file.find("div", {"class": "modifications"})
|
||||
if notes:
|
||||
for n in notes.children:
|
||||
if n.text == "":
|
||||
continue
|
||||
if n.name == "h3":
|
||||
step.instruction += f'*{n.text.strip()}:* \n\n'
|
||||
else:
|
||||
for br in n.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
|
||||
step.instruction += '*' + n.text.strip() + '* \n\n'
|
||||
|
||||
description = ''
|
||||
try:
|
||||
description = file.find("div", {"id": "description"}).text.strip()
|
||||
except AttributeError:
|
||||
pass
|
||||
if len(description) <= 512:
|
||||
recipe.description = description
|
||||
else:
|
||||
recipe.description = description[:480] + ' ... (full description below)'
|
||||
step.instruction += '*Description:* \n\n*' + description + '* \n\n'
|
||||
|
||||
step.save()
|
||||
recipe.steps.add(step)
|
||||
|
||||
# import the Primary recipe image that is stored in the Zip
|
||||
try:
|
||||
image_path = file.find("img").get("src")
|
||||
image_filename = image_path.split("\\")[1]
|
||||
|
||||
for f in self.import_zip.filelist:
|
||||
zip_file_name = Path(f.filename).name
|
||||
if image_filename == zip_file_name:
|
||||
image_file = self.import_zip.read(f)
|
||||
image_bytes = BytesIO(image_file)
|
||||
self.import_recipe_image(recipe, image_bytes, filetype='.jpeg')
|
||||
break
|
||||
except Exception as e:
|
||||
print(recipe.name, ': failed to import image ', str(e))
|
||||
|
||||
recipe.save()
|
||||
return recipe
|
||||
|
||||
def get_files_from_recipes(self, recipes, el, cookie):
|
||||
raise NotImplementedError('Method not implemented in storage integration')
|
||||
|
||||
def get_file_from_recipe(self, recipe):
|
||||
raise NotImplementedError('Method not implemented in storage integration')
|
||||
@@ -153,6 +153,19 @@ class Integration:
|
||||
il.total_recipes = len(new_file_list)
|
||||
file_list = new_file_list
|
||||
|
||||
if isinstance(self, cookbook.integration.gourmet.Gourmet):
|
||||
self.import_zip = import_zip
|
||||
new_file_list = []
|
||||
for file in file_list:
|
||||
if file.file_size == 0:
|
||||
next
|
||||
if file.filename.startswith("index.htm"):
|
||||
next
|
||||
if file.filename.endswith(".htm"):
|
||||
new_file_list += self.split_recipe_file(BytesIO(import_zip.read(file.filename)))
|
||||
il.total_recipes = len(new_file_list)
|
||||
file_list = new_file_list
|
||||
|
||||
for z in file_list:
|
||||
try:
|
||||
if not hasattr(z, 'filename') or isinstance(z, Tag):
|
||||
|
||||
Reference in New Issue
Block a user