basic importing working

This commit is contained in:
vabene1111
2020-06-23 10:34:04 +02:00
parent dc91e1e8ed
commit 8594346488
4 changed files with 139 additions and 33 deletions

View File

@@ -1,6 +1,7 @@
import re
from django.http import JsonResponse
from django.utils.dateparse import parse_datetime, parse_duration
from cookbook.models import Keyword
@@ -8,6 +9,12 @@ from cookbook.models import Keyword
def find_recipe_json(ld_json):
ld_json['org'] = str(ld_json)
if type(ld_json['name']) == list:
try:
ld_json['name'] = ld_json['name'][0]
except:
ld_json['name'] = 'ERROR'
# some sites use ingredients instead of recipeIngredients
if 'recipeIngredient' not in ld_json and 'ingredients' in ld_json:
ld_json['recipeIngredient'] = ld_json['ingredients']
@@ -22,13 +29,21 @@ def find_recipe_json(ld_json):
for x in ld_json['recipeIngredient']:
ingredient_split = x.split()
if len(ingredient_split) > 2:
ingredients.append({'amount': ingredient_split[0], 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
try:
ingredients.append({'amount': float(ingredient_split[0].replace(',', '.')), 'unit': ingredient_split[1], 'ingredient': " ".join(ingredient_split[2:])})
except ValueError:
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
if len(ingredient_split) == 2:
ingredients.append({'amount': ingredient_split[0], 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
try:
ingredients.append({'amount': float(ingredient_split[0].replace(',', '.')), 'unit': '', 'ingredient': " ".join(ingredient_split[1:])})
except ValueError:
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
if len(ingredient_split) == 1:
ingredients.append({'amount': 0, 'unit': '', 'ingredient': " ".join(ingredient_split)})
ld_json['recipeIngredient'] = ingredients
else:
ld_json['recipeIngredient'] = []
if 'keywords' in ld_json:
keywords = []
@@ -49,6 +64,8 @@ def find_recipe_json(ld_json):
keywords.append({'id': "null", 'text': kw.strip()})
ld_json['keywords'] = keywords
else:
ld_json['keywords'] = []
if 'recipeInstructions' in ld_json:
instructions = ''
@@ -66,6 +83,8 @@ def find_recipe_json(ld_json):
ld_json['recipeInstructions'] = re.sub(' +', ' ', ld_json['recipeInstructions'])
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('<p>', '')
ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('</p>', '')
else:
ld_json['recipeInstructions'] = ''
if 'image' in ld_json:
# check if list of images is returned, take first if so
@@ -79,4 +98,14 @@ def find_recipe_json(ld_json):
if 'http' not in ld_json['image']:
ld_json['image'] = ''
if 'cookTime' in ld_json:
if type(ld_json['cookTime']) == list and len(ld_json['cookTime']) > 0:
ld_json['cookTime'] = ld_json['cookTime'][0]
ld_json['cookTime'] = round(parse_duration(ld_json['cookTime']).seconds/60)
if 'prepTime' in ld_json:
if type(ld_json['prepTime']) == list and len(ld_json['prepTime']) > 0:
ld_json['prepTime'] = ld_json['prepTime'][0]
ld_json['prepTime'] = round(parse_duration(ld_json['prepTime']).seconds/60)
return JsonResponse(ld_json)

View File

@@ -22,7 +22,7 @@
<div class="input-group mb-3">
<input class="form-control" v-model="remote_url">
<div class="input-group-append">
<button @click="loadRecipe()" class="btn btn-primary" type="button"
<button @click="loadRecipe()" class="btn btn-primary shadow-none" type="button"
id="id_btn_search"><i class="fas fa-search"></i>
</button>
</div>
@@ -127,6 +127,43 @@
</form>
</template>
<template v-if="error !== undefined">
<div>
<div style="text-align: center">
<i class="fas fa-robot fa-8x"></i><br/><br/>
[[error.msg]]
</div>
</div>
<br/>
<div class="row">
<div class="col-md-8 offset-md-2">
<div class="card border-info mb-6">
<div class="card-body text-info">
<h5 class="card-title">{% trans 'Information' %}</h5>
<p class="card-text">
{% blocktrans %} Only websites containing ld+json or microdata information can currently
be imported. Most big recipe pages support this. If you site cannot be imported but
you think
it probably has some kind of structured data feel free to post an example in the
github issues.{% endblocktrans %}
</p>
<a href="https://developers.google.com/search/docs/data-types/recipe" target="_blank"
rel="noreferrer nofollow"
class="card-link">{% trans 'Google ld+json Info' %}</a>
<a href="https://github.com/vabene1111/recipes/issues" target="_blank"
rel="noreferrer nofollow"
class="card-link">{% trans 'GitHub Issues' %}</a>
<a href="https://schema.org/Recipe" target="_blank" rel="noreferrer nofollow"
class="card-link">{% trans 'Recipe Markup Specification' %}</a>
</div>
</div>
</div>
</div>
</template>
</div>
<script type="application/javascript">
@@ -142,9 +179,10 @@
delimiters: ['[[', ']]'],
el: '#app',
data: {
remote_url: 'https://www.chefkoch.de/rezepte/1716851280413039/Einfacher-Zwiebelkuchen.html',
remote_url: 'https://www.rezeptschachtel.de/schwarzwaelder_kirschtorte_rezept.html',
keywords: [],
recipe_data: undefined,
error: undefined,
loading: false,
all_keywords: false,
},
@@ -155,42 +193,24 @@
methods: {
loadRecipe: function () {
this.recipe_data = undefined
this.error = undefined
this.loading = true
this.$http.get("{% url 'api_recipe_from_url' 12345 %}".replace(/12345/, this.remote_url)).then((response) => {
this.recipe_data = response.data;
this.loading = false
}).catch((err) => {
this.error = err.data
this.loading = false
console.log(err)
})
},
importRecipe: function () {
let recipe_keywords = []
for (k of this.recipe_data.keywords) {
if (k.id !== "null") {
recipe_keywords.push(Number.parseInt(k.id))
}
//TODO create non existent if checked
}
let recipe = {
name: this.recipe_data.name,
instructions: this.recipe_data.recipeInstructions,
keywords: recipe_keywords,
created_by: {{ request.user.pk }},
}
this.$http.post(`{% url 'api:recipe-list' %}`, recipe).then((response) => {
let entry = response.data
console.log(entry)
//TODO create some kind of endpoint for ingredients, units and recipe ingredients creation
location.href = "{% url 'view_recipe' 12345 %}".replace(/12345/, entry.id)
this.$set(this.recipe_data, 'all_keywords', this.all_keywords)
this.$http.post(`{% url 'data_import_url' %}`, this.recipe_data).then((response) => {
location.href = response.data
}).catch((err) => {
console.log("dragChanged create error", err);
})
},
getKeywords: function () {
this.$http.get("{% url 'dal_keyword' %}").then((response) => {

View File

@@ -18,6 +18,7 @@ from icalendar import Calendar, Event
from rest_framework import viewsets, permissions
from rest_framework.exceptions import APIException
from rest_framework.mixins import RetrieveModelMixin, UpdateModelMixin, ListModelMixin
from urllib3.exceptions import NewConnectionError
from cookbook.helper.permission_helper import group_required, CustomIsOwner, CustomIsAdmin, CustomIsUser
from cookbook.helper.recipe_url_import import find_recipe_json
@@ -264,10 +265,14 @@ def get_plan_ical(request, html_week):
@group_required('user')
def recipe_from_url(request, url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
response = requests.get(url, headers=headers)
try:
response = requests.get(url, headers=headers)
except requests.exceptions.ConnectionError:
return JsonResponse({'error': True, 'msg': _('The requested page could not be found.')}, status=400)
if response.status_code == 403:
return JsonResponse({'error': _('The requested page refused to provide any information (Status Code 403).')})
return JsonResponse({'error': True, 'msg': _('The requested page refused to provide any information (Status Code 403).')}, status=400)
soup = BeautifulSoup(response.text, "html.parser")
@@ -288,7 +293,7 @@ def recipe_from_url(request, url):
if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe':
return find_recipe_json(ld_json_item)
except JSONDecodeError:
JsonResponse({'error': _('The requested site does not provided malformed data and cannot be read.')})
JsonResponse({'error': True, 'msg': _('The requested site does not provided malformed data and cannot be read.')}, status=400)
# now try to find microdata
items = microdata.get_items(response.text)
@@ -297,4 +302,4 @@ def recipe_from_url(request, url):
if 'schema.org/Recipe' in str(md_json['type']):
return find_recipe_json(md_json['properties'])
return JsonResponse({'error': _('The requested site does not provide any recognized data format to import the recipe from.')})
return JsonResponse({'error': True, 'msg': _('The requested site does not provide any recognized data format to import the recipe from.')}, status=400)

View File

@@ -1,8 +1,13 @@
import json
from datetime import datetime
from io import BytesIO
import requests
from PIL import Image
from django.contrib import messages
from django.core.files import File
from django.utils.translation import gettext as _
from django.http import HttpResponseRedirect
from django.http import HttpResponseRedirect, HttpResponse
from django.shortcuts import redirect, render
from django.urls import reverse
from django.utils.translation import ngettext
@@ -90,6 +95,53 @@ def batch_edit(request):
@group_required('user')
def import_url(request):
if request.method == 'POST':
data = json.loads(request.body)
recipe = Recipe.objects.create(
name=data['name'],
instructions=data['recipeInstructions'],
internal=True,
created_by=request.user,
)
for kw in data['keywords']:
if kw['id'] != "null" and (k := Keyword.objects.filter(id=kw['id']).first()):
recipe.keywords.add(k)
elif data['all_keywords']:
k = Keyword.objects.create(name=kw['text'])
recipe.keywords.add(k)
for ing in data['recipeIngredient']:
i, i_created = Ingredient.objects.get_or_create(name=ing['ingredient'])
u, u_created = Unit.objects.get_or_create(name=ing['unit'])
if isinstance(ing['amount'], str):
try:
ing['amount'] = float(ing['amount'].replace(',', '.'))
except ValueError:
# TODO return proper error
pass
RecipeIngredient.objects.create(recipe=recipe, ingredient=i, unit=u, amount=ing['amount'])
if data['image'] != '':
response = requests.get(data['image'])
img = Image.open(BytesIO(response.content))
# todo move image processing to dedicated function
basewidth = 720
wpercent = (basewidth / float(img.size[0]))
hsize = int((float(img.size[1]) * float(wpercent)))
img = img.resize((basewidth, hsize), Image.ANTIALIAS)
im_io = BytesIO()
img.save(im_io, 'PNG', quality=70)
recipe.image = File(im_io, name=f'{uuid.uuid4()}_{recipe.pk}.png')
recipe.save()
return HttpResponse(reverse('view_recipe', args=[recipe.pk]))
return render(request, 'url_import.html', {})