mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-01 12:18:45 -05:00
helper/recipe_url_import
This commit is contained in:
@@ -1,18 +1,16 @@
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import unicodedata
|
||||
from json import JSONDecodeError
|
||||
|
||||
import microdata
|
||||
from bs4 import BeautifulSoup
|
||||
from cookbook.helper.ingredient_parser import parse as parse_ingredient
|
||||
from cookbook.models import Keyword
|
||||
from django.http import JsonResponse
|
||||
from django.utils.dateparse import parse_duration
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
from cookbook.models import Keyword
|
||||
from cookbook.helper.ingredient_parser import parse as parse_ingredient
|
||||
|
||||
|
||||
def get_from_html(html_text, url):
|
||||
soup = BeautifulSoup(html_text, "html.parser")
|
||||
@@ -31,10 +29,16 @@ def get_from_html(html_text, url):
|
||||
if '@type' in x and x['@type'] == 'Recipe':
|
||||
ld_json_item = x
|
||||
|
||||
if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe':
|
||||
if ('@type' in ld_json_item
|
||||
and ld_json_item['@type'] == 'Recipe'):
|
||||
return find_recipe_json(ld_json_item, url)
|
||||
except JSONDecodeError as e:
|
||||
return JsonResponse({'error': True, 'msg': _('The requested site provided malformed data and cannot be read.')}, status=400)
|
||||
except JSONDecodeError:
|
||||
return JsonResponse(
|
||||
{
|
||||
'error': True,
|
||||
'msg': _('The requested site provided malformed data and cannot be read.') # noqa: E501
|
||||
},
|
||||
status=400)
|
||||
|
||||
# now try to find microdata
|
||||
items = microdata.get_items(html_text)
|
||||
@@ -43,14 +47,19 @@ def get_from_html(html_text, url):
|
||||
if 'schema.org/Recipe' in str(md_json['type']):
|
||||
return find_recipe_json(md_json['properties'], url)
|
||||
|
||||
return JsonResponse({'error': True, 'msg': _('The requested site does not provide any recognized data format to import the recipe from.')}, status=400)
|
||||
return JsonResponse(
|
||||
{
|
||||
'error': True,
|
||||
'msg': _('The requested site does not provide any recognized data format to import the recipe from.') # noqa: E501
|
||||
},
|
||||
status=400)
|
||||
|
||||
|
||||
def find_recipe_json(ld_json, url):
|
||||
if type(ld_json['name']) == list:
|
||||
try:
|
||||
ld_json['name'] = ld_json['name'][0]
|
||||
except:
|
||||
except Exception:
|
||||
ld_json['name'] = 'ERROR'
|
||||
|
||||
# some sites use ingredients instead of recipeIngredients
|
||||
@@ -59,8 +68,9 @@ def find_recipe_json(ld_json, url):
|
||||
|
||||
if 'recipeIngredient' in ld_json:
|
||||
# some pages have comma separated ingredients in a single array entry
|
||||
if len(ld_json['recipeIngredient']) == 1 and len(ld_json['recipeIngredient'][0]) > 30:
|
||||
ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',')
|
||||
if (len(ld_json['recipeIngredient']) == 1
|
||||
and len(ld_json['recipeIngredient'][0]) > 30):
|
||||
ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',') # noqa: E501
|
||||
|
||||
for x in ld_json['recipeIngredient']:
|
||||
if '\n' in x:
|
||||
@@ -71,13 +81,41 @@ def find_recipe_json(ld_json, url):
|
||||
ingredients = []
|
||||
|
||||
for x in ld_json['recipeIngredient']:
|
||||
if x.replace(' ','') != '':
|
||||
if x.replace(' ', '') != '':
|
||||
try:
|
||||
amount, unit, ingredient, note = parse_ingredient(x)
|
||||
if ingredient:
|
||||
ingredients.append({'amount': amount, 'unit': {'text': unit, 'id': random.randrange(10000, 99999)}, 'ingredient': {'text': ingredient, 'id': random.randrange(10000, 99999)}, "note": note, 'original': x})
|
||||
except:
|
||||
ingredients.append({'amount': 0, 'unit': {'text': "", 'id': random.randrange(10000, 99999)}, 'ingredient': {'text': x, 'id': random.randrange(10000, 99999)}, "note": "", 'original': x})
|
||||
ingredients.append(
|
||||
{
|
||||
'amount': amount,
|
||||
'unit': {
|
||||
'text': unit,
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'ingredient': {
|
||||
'text': ingredient,
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'note': note,
|
||||
'original': x
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
ingredients.append(
|
||||
{
|
||||
'amount': 0,
|
||||
'unit': {
|
||||
'text': '',
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'ingredient': {
|
||||
'text': x,
|
||||
'id': random.randrange(10000, 99999)
|
||||
},
|
||||
'note': '',
|
||||
'original': x
|
||||
}
|
||||
)
|
||||
|
||||
ld_json['recipeIngredient'] = ingredients
|
||||
else:
|
||||
@@ -91,7 +129,9 @@ def find_recipe_json(ld_json, url):
|
||||
ld_json['keywords'] = ld_json['keywords'].split(',')
|
||||
|
||||
# keywords as string in list
|
||||
if type(ld_json['keywords']) == list and len(ld_json['keywords']) == 1 and ',' in ld_json['keywords'][0]:
|
||||
if (type(ld_json['keywords']) == list
|
||||
and len(ld_json['keywords']) == 1
|
||||
and ',' in ld_json['keywords'][0]):
|
||||
ld_json['keywords'] = ld_json['keywords'][0].split(',')
|
||||
|
||||
# keywords as list
|
||||
@@ -126,10 +166,10 @@ def find_recipe_json(ld_json, url):
|
||||
instructions += str(i)
|
||||
ld_json['recipeInstructions'] = instructions
|
||||
|
||||
ld_json['recipeInstructions'] = re.sub(r'\n\s*\n', '\n\n', ld_json['recipeInstructions'])
|
||||
ld_json['recipeInstructions'] = re.sub(' +', ' ', ld_json['recipeInstructions'])
|
||||
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('<p>', '')
|
||||
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('</p>', '')
|
||||
ld_json['recipeInstructions'] = re.sub(r'\n\s*\n', '\n\n', ld_json['recipeInstructions']) # noqa: E501
|
||||
ld_json['recipeInstructions'] = re.sub(' +', ' ', ld_json['recipeInstructions']) # noqa: E501
|
||||
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('<p>', '') # noqa: E501
|
||||
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('</p>', '') # noqa: E501
|
||||
else:
|
||||
ld_json['recipeInstructions'] = ''
|
||||
|
||||
@@ -149,9 +189,14 @@ def find_recipe_json(ld_json, url):
|
||||
|
||||
if 'cookTime' in ld_json:
|
||||
try:
|
||||
if type(ld_json['cookTime']) == list and len(ld_json['cookTime']) > 0:
|
||||
if (type(ld_json['cookTime']) == list
|
||||
and len(ld_json['cookTime']) > 0):
|
||||
ld_json['cookTime'] = ld_json['cookTime'][0]
|
||||
ld_json['cookTime'] = round(parse_duration(ld_json['cookTime']).seconds / 60)
|
||||
ld_json['cookTime'] = round(
|
||||
parse_duration(
|
||||
ld_json['cookTime']
|
||||
).seconds / 60
|
||||
)
|
||||
except TypeError:
|
||||
ld_json['cookTime'] = 0
|
||||
else:
|
||||
@@ -159,16 +204,24 @@ def find_recipe_json(ld_json, url):
|
||||
|
||||
if 'prepTime' in ld_json:
|
||||
try:
|
||||
if type(ld_json['prepTime']) == list and len(ld_json['prepTime']) > 0:
|
||||
if (type(ld_json['prepTime']) == list
|
||||
and len(ld_json['prepTime']) > 0):
|
||||
ld_json['prepTime'] = ld_json['prepTime'][0]
|
||||
ld_json['prepTime'] = round(parse_duration(ld_json['prepTime']).seconds / 60)
|
||||
ld_json['prepTime'] = round(
|
||||
parse_duration(
|
||||
ld_json['prepTime']
|
||||
).seconds / 60
|
||||
)
|
||||
except TypeError:
|
||||
ld_json['prepTime'] = 0
|
||||
else:
|
||||
ld_json['prepTime'] = 0
|
||||
|
||||
for key in list(ld_json):
|
||||
if key not in ['prepTime', 'cookTime', 'image', 'recipeInstructions', 'keywords', 'name', 'recipeIngredient']:
|
||||
if key not in [
|
||||
'prepTime', 'cookTime', 'image', 'recipeInstructions',
|
||||
'keywords', 'name', 'recipeIngredient'
|
||||
]:
|
||||
ld_json.pop(key, None)
|
||||
|
||||
return JsonResponse(ld_json)
|
||||
|
||||
Reference in New Issue
Block a user