mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-02 04:39:54 -05:00
@@ -1,189 +1,191 @@
|
||||
import json
|
||||
import re
|
||||
from json import JSONDecodeError
|
||||
from urllib.parse import unquote
|
||||
# import json
|
||||
# import re
|
||||
# from json import JSONDecodeError
|
||||
# from urllib.parse import unquote
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
from recipe_scrapers import scrape_html, scrape_me
|
||||
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
|
||||
from recipe_scrapers._utils import get_host_name, normalize_string
|
||||
# from bs4 import BeautifulSoup
|
||||
# from bs4.element import Tag
|
||||
# from recipe_scrapers import scrape_html, scrape_me
|
||||
# from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
|
||||
# from recipe_scrapers._utils import get_host_name, normalize_string
|
||||
|
||||
from cookbook.helper import recipe_url_import as helper
|
||||
from cookbook.helper.scrapers.scrapers import text_scraper
|
||||
# from cookbook.helper import recipe_url_import as helper
|
||||
# from cookbook.helper.scrapers.scrapers import text_scraper
|
||||
|
||||
|
||||
def get_recipe_from_source(text, url, request):
|
||||
def build_node(k, v):
|
||||
if isinstance(v, dict):
|
||||
node = {
|
||||
'name': k,
|
||||
'value': k,
|
||||
'children': get_children_dict(v)
|
||||
}
|
||||
elif isinstance(v, list):
|
||||
node = {
|
||||
'name': k,
|
||||
'value': k,
|
||||
'children': get_children_list(v)
|
||||
}
|
||||
else:
|
||||
node = {
|
||||
'name': k + ": " + normalize_string(str(v)),
|
||||
'value': normalize_string(str(v))
|
||||
}
|
||||
return node
|
||||
# def get_recipe_from_source(text, url, request):
|
||||
# def build_node(k, v):
|
||||
# if isinstance(v, dict):
|
||||
# node = {
|
||||
# 'name': k,
|
||||
# 'value': k,
|
||||
# 'children': get_children_dict(v)
|
||||
# }
|
||||
# elif isinstance(v, list):
|
||||
# node = {
|
||||
# 'name': k,
|
||||
# 'value': k,
|
||||
# 'children': get_children_list(v)
|
||||
# }
|
||||
# else:
|
||||
# node = {
|
||||
# 'name': k + ": " + normalize_string(str(v)),
|
||||
# 'value': normalize_string(str(v))
|
||||
# }
|
||||
# return node
|
||||
|
||||
def get_children_dict(children):
|
||||
kid_list = []
|
||||
for k, v in children.items():
|
||||
kid_list.append(build_node(k, v))
|
||||
return kid_list
|
||||
# def get_children_dict(children):
|
||||
# kid_list = []
|
||||
# for k, v in children.items():
|
||||
# kid_list.append(build_node(k, v))
|
||||
# return kid_list
|
||||
|
||||
def get_children_list(children):
|
||||
kid_list = []
|
||||
for kid in children:
|
||||
if type(kid) == list:
|
||||
node = {
|
||||
'name': "unknown list",
|
||||
'value': "unknown list",
|
||||
'children': get_children_list(kid)
|
||||
}
|
||||
kid_list.append(node)
|
||||
elif type(kid) == dict:
|
||||
for k, v in kid.items():
|
||||
kid_list.append(build_node(k, v))
|
||||
else:
|
||||
kid_list.append({
|
||||
'name': normalize_string(str(kid)),
|
||||
'value': normalize_string(str(kid))
|
||||
})
|
||||
return kid_list
|
||||
# def get_children_list(children):
|
||||
# kid_list = []
|
||||
# for kid in children:
|
||||
# if type(kid) == list:
|
||||
# node = {
|
||||
# 'name': "unknown list",
|
||||
# 'value': "unknown list",
|
||||
# 'children': get_children_list(kid)
|
||||
# }
|
||||
# kid_list.append(node)
|
||||
# elif type(kid) == dict:
|
||||
# for k, v in kid.items():
|
||||
# kid_list.append(build_node(k, v))
|
||||
# else:
|
||||
# kid_list.append({
|
||||
# 'name': normalize_string(str(kid)),
|
||||
# 'value': normalize_string(str(kid))
|
||||
# })
|
||||
# return kid_list
|
||||
|
||||
recipe_tree = []
|
||||
parse_list = []
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
html_data = get_from_html(soup)
|
||||
images = get_images_from_source(soup, url)
|
||||
text = unquote(text)
|
||||
scrape = None
|
||||
# recipe_tree = []
|
||||
# parse_list = []
|
||||
# soup = BeautifulSoup(text, "html.parser")
|
||||
# html_data = get_from_html(soup)
|
||||
# images = get_images_from_source(soup, url)
|
||||
# text = unquote(text)
|
||||
# scrape = None
|
||||
|
||||
if url:
|
||||
try:
|
||||
scrape = scrape_me(url_path=url, wild_mode=True)
|
||||
except(NoSchemaFoundInWildMode):
|
||||
pass
|
||||
if not scrape:
|
||||
try:
|
||||
parse_list.append(remove_graph(json.loads(text)))
|
||||
if not url and 'url' in parse_list[0]:
|
||||
url = parse_list[0]['url']
|
||||
scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
|
||||
# if url and not text:
|
||||
# try:
|
||||
# scrape = scrape_me(url_path=url, wild_mode=True)
|
||||
# except(NoSchemaFoundInWildMode):
|
||||
# pass
|
||||
|
||||
except JSONDecodeError:
|
||||
for el in soup.find_all('script', type='application/ld+json'):
|
||||
el = remove_graph(el)
|
||||
if not url and 'url' in el:
|
||||
url = el['url']
|
||||
if type(el) == list:
|
||||
for le in el:
|
||||
parse_list.append(le)
|
||||
elif type(el) == dict:
|
||||
parse_list.append(el)
|
||||
for el in soup.find_all(type='application/json'):
|
||||
el = remove_graph(el)
|
||||
if type(el) == list:
|
||||
for le in el:
|
||||
parse_list.append(le)
|
||||
elif type(el) == dict:
|
||||
parse_list.append(el)
|
||||
scrape = text_scraper(text, url=url)
|
||||
# if not scrape:
|
||||
# try:
|
||||
# parse_list.append(remove_graph(json.loads(text)))
|
||||
# if not url and 'url' in parse_list[0]:
|
||||
# url = parse_list[0]['url']
|
||||
# scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
|
||||
|
||||
recipe_json = helper.get_from_scraper(scrape, request)
|
||||
# except JSONDecodeError:
|
||||
# for el in soup.find_all('script', type='application/ld+json'):
|
||||
# el = remove_graph(el)
|
||||
# if not url and 'url' in el:
|
||||
# url = el['url']
|
||||
# if type(el) == list:
|
||||
# for le in el:
|
||||
# parse_list.append(le)
|
||||
# elif type(el) == dict:
|
||||
# parse_list.append(el)
|
||||
# for el in soup.find_all(type='application/json'):
|
||||
# el = remove_graph(el)
|
||||
# if type(el) == list:
|
||||
# for le in el:
|
||||
# parse_list.append(le)
|
||||
# elif type(el) == dict:
|
||||
# parse_list.append(el)
|
||||
# scrape = text_scraper(text, url=url)
|
||||
|
||||
for el in parse_list:
|
||||
temp_tree = []
|
||||
if isinstance(el, Tag):
|
||||
try:
|
||||
el = json.loads(el.string)
|
||||
except TypeError:
|
||||
continue
|
||||
# recipe_json = helper.get_from_scraper(scrape, request)
|
||||
|
||||
for k, v in el.items():
|
||||
if isinstance(v, dict):
|
||||
node = {
|
||||
'name': k,
|
||||
'value': k,
|
||||
'children': get_children_dict(v)
|
||||
}
|
||||
elif isinstance(v, list):
|
||||
node = {
|
||||
'name': k,
|
||||
'value': k,
|
||||
'children': get_children_list(v)
|
||||
}
|
||||
else:
|
||||
node = {
|
||||
'name': k + ": " + normalize_string(str(v)),
|
||||
'value': normalize_string(str(v))
|
||||
}
|
||||
temp_tree.append(node)
|
||||
# # TODO: DEPRECATE recipe_tree & html_data. first validate it isn't used anywhere
|
||||
# for el in parse_list:
|
||||
# temp_tree = []
|
||||
# if isinstance(el, Tag):
|
||||
# try:
|
||||
# el = json.loads(el.string)
|
||||
# except TypeError:
|
||||
# continue
|
||||
|
||||
if '@type' in el and el['@type'] == 'Recipe':
|
||||
recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
|
||||
else:
|
||||
recipe_tree += [{'name': 'json', 'children': temp_tree}]
|
||||
# for k, v in el.items():
|
||||
# if isinstance(v, dict):
|
||||
# node = {
|
||||
# 'name': k,
|
||||
# 'value': k,
|
||||
# 'children': get_children_dict(v)
|
||||
# }
|
||||
# elif isinstance(v, list):
|
||||
# node = {
|
||||
# 'name': k,
|
||||
# 'value': k,
|
||||
# 'children': get_children_list(v)
|
||||
# }
|
||||
# else:
|
||||
# node = {
|
||||
# 'name': k + ": " + normalize_string(str(v)),
|
||||
# 'value': normalize_string(str(v))
|
||||
# }
|
||||
# temp_tree.append(node)
|
||||
|
||||
return recipe_json, recipe_tree, html_data, images
|
||||
# if '@type' in el and el['@type'] == 'Recipe':
|
||||
# recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
|
||||
# else:
|
||||
# recipe_tree += [{'name': 'json', 'children': temp_tree}]
|
||||
|
||||
# return recipe_json, recipe_tree, html_data, images
|
||||
|
||||
|
||||
def get_from_html(soup):
|
||||
INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
|
||||
html = []
|
||||
for s in soup.strings:
|
||||
if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
|
||||
html.append(s)
|
||||
return html
|
||||
# def get_from_html(soup):
|
||||
# INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
|
||||
# html = []
|
||||
# for s in soup.strings:
|
||||
# if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
|
||||
# html.append(s)
|
||||
# return html
|
||||
|
||||
|
||||
def get_images_from_source(soup, url):
|
||||
sources = ['src', 'srcset', 'data-src']
|
||||
images = []
|
||||
img_tags = soup.find_all('img')
|
||||
if url:
|
||||
site = get_host_name(url)
|
||||
prot = url.split(':')[0]
|
||||
# def get_images_from_source(soup, url):
|
||||
# sources = ['src', 'srcset', 'data-src']
|
||||
# images = []
|
||||
# img_tags = soup.find_all('img')
|
||||
# if url:
|
||||
# site = get_host_name(url)
|
||||
# prot = url.split(':')[0]
|
||||
|
||||
urls = []
|
||||
for img in img_tags:
|
||||
for src in sources:
|
||||
try:
|
||||
urls.append(img[src])
|
||||
except KeyError:
|
||||
pass
|
||||
# urls = []
|
||||
# for img in img_tags:
|
||||
# for src in sources:
|
||||
# try:
|
||||
# urls.append(img[src])
|
||||
# except KeyError:
|
||||
# pass
|
||||
|
||||
for u in urls:
|
||||
u = u.split('?')[0]
|
||||
filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
|
||||
if filename:
|
||||
if (('http' not in u) and (url)):
|
||||
# sometimes an image source can be relative
|
||||
# if it is provide the base url
|
||||
u = '{}://{}{}'.format(prot, site, u)
|
||||
if 'http' in u:
|
||||
images.append(u)
|
||||
return images
|
||||
# for u in urls:
|
||||
# u = u.split('?')[0]
|
||||
# filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
|
||||
# if filename:
|
||||
# if (('http' not in u) and (url)):
|
||||
# # sometimes an image source can be relative
|
||||
# # if it is provide the base url
|
||||
# u = '{}://{}{}'.format(prot, site, u)
|
||||
# if 'http' in u:
|
||||
# images.append(u)
|
||||
# return images
|
||||
|
||||
|
||||
def remove_graph(el):
|
||||
# recipes type might be wrapped in @graph type
|
||||
if isinstance(el, Tag):
|
||||
try:
|
||||
el = json.loads(el.string)
|
||||
if '@graph' in el:
|
||||
for x in el['@graph']:
|
||||
if '@type' in x and x['@type'] == 'Recipe':
|
||||
el = x
|
||||
except (TypeError, JSONDecodeError):
|
||||
pass
|
||||
return el
|
||||
# def remove_graph(el):
|
||||
# # recipes type might be wrapped in @graph type
|
||||
# if isinstance(el, Tag):
|
||||
# try:
|
||||
# el = json.loads(el.string)
|
||||
# if '@graph' in el:
|
||||
# for x in el['@graph']:
|
||||
# if '@type' in x and x['@type'] == 'Recipe':
|
||||
# el = x
|
||||
# except (TypeError, JSONDecodeError):
|
||||
# pass
|
||||
# return el
|
||||
|
||||
@@ -1,21 +1,19 @@
|
||||
import random
|
||||
import re
|
||||
from html import unescape
|
||||
|
||||
from pytube import YouTube
|
||||
from unicodedata import decomposition
|
||||
|
||||
from django.utils.dateparse import parse_duration
|
||||
from django.utils.translation import gettext as _
|
||||
from isodate import parse_duration as iso_parse_duration
|
||||
from isodate.isoerror import ISO8601Error
|
||||
from recipe_scrapers._utils import get_minutes
|
||||
from pytube import YouTube
|
||||
from recipe_scrapers._utils import get_host_name, get_minutes
|
||||
|
||||
from cookbook.helper import recipe_url_import as helper
|
||||
from cookbook.helper.ingredient_parser import IngredientParser
|
||||
from cookbook.models import Keyword
|
||||
|
||||
|
||||
# from recipe_scrapers._utils import get_minutes ## temporary until/unless upstream incorporates get_minutes() PR
|
||||
|
||||
|
||||
@@ -369,3 +367,32 @@ def iso_duration_to_minutes(string):
|
||||
string
|
||||
).groupdict()
|
||||
return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)
|
||||
|
||||
|
||||
def get_images_from_soup(soup, url):
|
||||
sources = ['src', 'srcset', 'data-src']
|
||||
images = []
|
||||
img_tags = soup.find_all('img')
|
||||
if url:
|
||||
site = get_host_name(url)
|
||||
prot = url.split(':')[0]
|
||||
|
||||
urls = []
|
||||
for img in img_tags:
|
||||
for src in sources:
|
||||
try:
|
||||
urls.append(img[src])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for u in urls:
|
||||
u = u.split('?')[0]
|
||||
filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
|
||||
if filename:
|
||||
if (('http' not in u) and (url)):
|
||||
# sometimes an image source can be relative
|
||||
# if it is provide the base url
|
||||
u = '{}://{}{}'.format(prot, site, u)
|
||||
if 'http' in u:
|
||||
images.append(u)
|
||||
return images
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from json import JSONDecodeError
|
||||
from recipe_scrapers import SCRAPERS
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from recipe_scrapers import SCRAPERS, get_host_name
|
||||
from recipe_scrapers._factory import SchemaScraperFactory
|
||||
from recipe_scrapers._schemaorg import SchemaOrg
|
||||
|
||||
@@ -15,22 +16,28 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
|
||||
|
||||
|
||||
def text_scraper(text, url=None):
|
||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
||||
domain = None
|
||||
if url:
|
||||
domain = get_host_name(url)
|
||||
if domain in SCRAPERS:
|
||||
scraper_class = SCRAPERS[domain]
|
||||
else:
|
||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
||||
|
||||
class TextScraper(scraper_class):
|
||||
def __init__(
|
||||
self,
|
||||
page_data,
|
||||
url=None
|
||||
html=None,
|
||||
url=None,
|
||||
):
|
||||
self.wild_mode = False
|
||||
self.meta_http_equiv = False
|
||||
self.soup = BeautifulSoup(page_data, "html.parser")
|
||||
self.soup = BeautifulSoup(html, "html.parser")
|
||||
self.url = url
|
||||
self.recipe = None
|
||||
try:
|
||||
self.schema = SchemaOrg(page_data)
|
||||
self.schema = SchemaOrg(html)
|
||||
except (JSONDecodeError, AttributeError):
|
||||
pass
|
||||
|
||||
return TextScraper(text, url)
|
||||
return TextScraper(url=url, html=text)
|
||||
|
||||
@@ -10,8 +10,8 @@ import validators
|
||||
import yaml
|
||||
|
||||
from cookbook.helper.ingredient_parser import IngredientParser
|
||||
from cookbook.helper.recipe_html_import import get_recipe_from_source
|
||||
from cookbook.helper.recipe_url_import import iso_duration_to_minutes
|
||||
from cookbook.helper.recipe_url_import import get_images_from_soup, iso_duration_to_minutes
|
||||
from cookbook.helper.scrapers.scrapers import text_scraper
|
||||
from cookbook.integration.integration import Integration
|
||||
from cookbook.models import Ingredient, Keyword, Recipe, Step
|
||||
|
||||
@@ -24,7 +24,10 @@ class CookBookApp(Integration):
|
||||
def get_recipe_from_file(self, file):
|
||||
recipe_html = file.getvalue().decode("utf-8")
|
||||
|
||||
recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
|
||||
# recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
|
||||
scrape = text_scraper(text=data)
|
||||
recipe_json = helper.get_from_scraper(scrape, request)
|
||||
images = list(dict.fromkeys(get_images_from_soup(scrape.soup, url)))
|
||||
|
||||
recipe = Recipe.objects.create(
|
||||
name=recipe_json['name'].strip(),
|
||||
|
||||
@@ -3,10 +3,9 @@ from io import BytesIO
|
||||
from zipfile import ZipFile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
from cookbook.helper.ingredient_parser import IngredientParser
|
||||
from cookbook.helper.recipe_html_import import get_recipe_from_source
|
||||
from cookbook.helper.recipe_url_import import iso_duration_to_minutes, parse_servings
|
||||
from cookbook.integration.integration import Integration
|
||||
from cookbook.models import Ingredient, Keyword, Recipe, Step
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
import traceback
|
||||
from datetime import timedelta, datetime
|
||||
from datetime import datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from gettext import gettext as _
|
||||
from html import escape
|
||||
from smtplib import SMTPException
|
||||
|
||||
from PIL import Image
|
||||
from django.contrib.auth.models import User, Group
|
||||
from django.contrib.auth.models import Group, User
|
||||
from django.core.mail import send_mail
|
||||
from django.db.models import Avg, Q, QuerySet, Sum
|
||||
from django.http import BadHeaderError
|
||||
@@ -14,6 +13,7 @@ from django.urls import reverse
|
||||
from django.utils import timezone
|
||||
from django_scopes import scopes_disabled
|
||||
from drf_writable_nested import UniqueFieldsMixin, WritableNestedModelSerializer
|
||||
from PIL import Image
|
||||
from rest_framework import serializers
|
||||
from rest_framework.exceptions import NotFound, ValidationError
|
||||
|
||||
@@ -22,14 +22,14 @@ from cookbook.helper.HelperFunctions import str2bool
|
||||
from cookbook.helper.permission_helper import above_space_limit
|
||||
from cookbook.helper.shopping_helper import RecipeShoppingEditor
|
||||
from cookbook.models import (Automation, BookmarkletImport, Comment, CookLog, CustomFilter,
|
||||
ExportLog, Food, FoodInheritField, ImportLog, Ingredient, Keyword,
|
||||
MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
|
||||
ExportLog, Food, FoodInheritField, ImportLog, Ingredient, InviteLink,
|
||||
Keyword, MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
|
||||
RecipeBookEntry, RecipeImport, ShareLink, ShoppingList,
|
||||
ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
|
||||
SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
|
||||
UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
|
||||
ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
|
||||
Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
|
||||
SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
|
||||
from cookbook.templatetags.custom_tags import markdown
|
||||
from recipes.settings import MEDIA_URL, AWS_ENABLED
|
||||
from recipes.settings import AWS_ENABLED, MEDIA_URL
|
||||
|
||||
|
||||
class ExtendedRecipeMixin(serializers.ModelSerializer):
|
||||
@@ -193,7 +193,8 @@ class SpaceSerializer(WritableNestedModelSerializer):
|
||||
|
||||
class Meta:
|
||||
model = Space
|
||||
fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
|
||||
fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users',
|
||||
'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
|
||||
read_only_fields = ('id', 'created_by', 'created_at', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo',)
|
||||
|
||||
|
||||
@@ -815,7 +816,7 @@ class RecipeBookEntrySerializer(serializers.ModelSerializer):
|
||||
book = validated_data['book']
|
||||
recipe = validated_data['recipe']
|
||||
if not book.get_owner() == self.context['request'].user and not self.context[
|
||||
'request'].user in book.get_shared():
|
||||
'request'].user in book.get_shared():
|
||||
raise NotFound(detail=None, code=None)
|
||||
obj, created = RecipeBookEntry.objects.get_or_create(book=book, recipe=recipe)
|
||||
return obj
|
||||
@@ -871,11 +872,11 @@ class ShoppingListRecipeSerializer(serializers.ModelSerializer):
|
||||
value = value.quantize(
|
||||
Decimal(1)) if value == value.to_integral() else value.normalize() # strips trailing zero
|
||||
return (
|
||||
obj.name
|
||||
or getattr(obj.mealplan, 'title', None)
|
||||
or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
|
||||
or obj.recipe.name
|
||||
) + f' ({value:.2g})'
|
||||
obj.name
|
||||
or getattr(obj.mealplan, 'title', None)
|
||||
or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
|
||||
or obj.recipe.name
|
||||
) + f' ({value:.2g})'
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
# TODO remove once old shopping list
|
||||
@@ -1232,6 +1233,6 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
|
||||
# non model serializers
|
||||
|
||||
class RecipeFromSourceSerializer(serializers.Serializer):
|
||||
url = serializers.CharField(max_length=4096, required=False, allow_null=True)
|
||||
url = serializers.CharField(max_length=4096, required=False, allow_null=True, allow_blank=True)
|
||||
data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
|
||||
bookmarklet = serializers.IntegerField(required=False, allow_null=True, )
|
||||
|
||||
@@ -5,20 +5,20 @@ import re
|
||||
import traceback
|
||||
import uuid
|
||||
from collections import OrderedDict
|
||||
from json import JSONDecodeError
|
||||
from urllib.parse import unquote
|
||||
from zipfile import ZipFile
|
||||
|
||||
import requests
|
||||
import validators
|
||||
from PIL import UnidentifiedImageError
|
||||
from annoying.decorators import ajax_request
|
||||
from annoying.functions import get_object_or_None
|
||||
from django.contrib import messages
|
||||
from django.contrib.auth.models import User, Group
|
||||
from django.contrib.auth.models import Group, User
|
||||
from django.contrib.postgres.search import TrigramSimilarity
|
||||
from django.core.exceptions import FieldError, ValidationError
|
||||
from django.core.files import File
|
||||
from django.db.models import (Case, Count, Exists, OuterRef, ProtectedError, Q,
|
||||
Subquery, Value, When)
|
||||
from django.db.models import Case, Count, Exists, OuterRef, ProtectedError, Q, Subquery, Value, When
|
||||
from django.db.models.fields.related import ForeignObjectRel
|
||||
from django.db.models.functions import Coalesce, Lower
|
||||
from django.http import FileResponse, HttpResponse, JsonResponse
|
||||
@@ -27,6 +27,9 @@ from django.urls import reverse
|
||||
from django.utils.translation import gettext as _
|
||||
from django_scopes import scopes_disabled
|
||||
from icalendar import Calendar, Event
|
||||
from PIL import UnidentifiedImageError
|
||||
from recipe_scrapers import scrape_html, scrape_me
|
||||
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
|
||||
from requests.exceptions import MissingSchema
|
||||
from rest_framework import decorators, status, viewsets
|
||||
from rest_framework.authtoken.models import Token
|
||||
@@ -41,43 +44,47 @@ from rest_framework.throttling import AnonRateThrottle
|
||||
from rest_framework.viewsets import ViewSetMixin
|
||||
from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
|
||||
|
||||
from cookbook.helper import recipe_url_import as helper
|
||||
from cookbook.helper.HelperFunctions import str2bool
|
||||
from cookbook.helper.image_processing import handle_image
|
||||
from cookbook.helper.ingredient_parser import IngredientParser
|
||||
from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, CustomIsOwner,
|
||||
CustomIsShare, CustomIsShared, CustomIsUser,
|
||||
group_required, CustomIsSpaceOwner, switch_user_active_space, is_space_owner, CustomIsOwnerReadOnly)
|
||||
from cookbook.helper.recipe_html_import import get_recipe_from_source
|
||||
CustomIsOwnerReadOnly, CustomIsShare, CustomIsShared,
|
||||
CustomIsSpaceOwner, CustomIsUser, group_required,
|
||||
is_space_owner, switch_user_active_space)
|
||||
from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search
|
||||
from cookbook.helper.recipe_url_import import get_from_youtube_scraper
|
||||
from cookbook.helper.recipe_url_import import get_from_youtube_scraper, get_images_from_soup
|
||||
from cookbook.helper.scrapers.scrapers import text_scraper
|
||||
from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
|
||||
from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
|
||||
FoodInheritField, ImportLog, Ingredient, Keyword, MealPlan, MealType,
|
||||
Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
|
||||
ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
|
||||
SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
|
||||
UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
|
||||
FoodInheritField, ImportLog, Ingredient, InviteLink, Keyword, MealPlan,
|
||||
MealType, Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
|
||||
ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
|
||||
Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
|
||||
SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
|
||||
from cookbook.provider.dropbox import Dropbox
|
||||
from cookbook.provider.local import Local
|
||||
from cookbook.provider.nextcloud import Nextcloud
|
||||
from cookbook.schemas import FilterSchema, QueryParam, QueryParamAutoSchema, TreeSchema
|
||||
from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializer,
|
||||
CookLogSerializer, CustomFilterSerializer, ExportLogSerializer,
|
||||
from cookbook.serializer import (AutomationSerializer, BookmarkletImportListSerializer,
|
||||
BookmarkletImportSerializer, CookLogSerializer,
|
||||
CustomFilterSerializer, ExportLogSerializer,
|
||||
FoodInheritFieldSerializer, FoodSerializer,
|
||||
FoodShoppingUpdateSerializer, ImportLogSerializer,
|
||||
IngredientSerializer, KeywordSerializer, MealPlanSerializer,
|
||||
FoodShoppingUpdateSerializer, GroupSerializer, ImportLogSerializer,
|
||||
IngredientSerializer, IngredientSimpleSerializer,
|
||||
InviteLinkSerializer, KeywordSerializer, MealPlanSerializer,
|
||||
MealTypeSerializer, RecipeBookEntrySerializer,
|
||||
RecipeBookSerializer, RecipeImageSerializer,
|
||||
RecipeOverviewSerializer, RecipeSerializer,
|
||||
RecipeBookSerializer, RecipeFromSourceSerializer,
|
||||
RecipeImageSerializer, RecipeOverviewSerializer, RecipeSerializer,
|
||||
RecipeShoppingUpdateSerializer, RecipeSimpleSerializer,
|
||||
ShoppingListAutoSyncSerializer, ShoppingListEntrySerializer,
|
||||
ShoppingListRecipeSerializer, ShoppingListSerializer,
|
||||
StepSerializer, StorageSerializer,
|
||||
SpaceSerializer, StepSerializer, StorageSerializer,
|
||||
SupermarketCategoryRelationSerializer,
|
||||
SupermarketCategorySerializer, SupermarketSerializer,
|
||||
SyncLogSerializer, SyncSerializer, UnitSerializer,
|
||||
UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
|
||||
ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer, SpaceSerializer, UserSpaceSerializer, GroupSerializer, InviteLinkSerializer)
|
||||
UserSpaceSerializer, ViewLogSerializer)
|
||||
from recipes import settings
|
||||
|
||||
|
||||
@@ -713,7 +720,7 @@ class RecipeViewSet(viewsets.ModelViewSet):
|
||||
'Query string matched (fuzzy) against recipe name. In the future also fulltext search.')),
|
||||
QueryParam(name='keywords', description=_(
|
||||
'ID of keyword a recipe should have. For multiple repeat parameter. Equivalent to keywords_or'),
|
||||
qtype='int'),
|
||||
qtype='int'),
|
||||
QueryParam(name='keywords_or',
|
||||
description=_('Keyword IDs, repeat for multiple. Return recipes with any of the keywords'),
|
||||
qtype='int'),
|
||||
@@ -1114,69 +1121,79 @@ def recipe_from_source(request):
|
||||
- url: url to use for importing recipe
|
||||
- data: if no url is given recipe is imported from provided source data
|
||||
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
|
||||
:return: JsonResponse containing the parsed json, original html,json and images
|
||||
:return: JsonResponse containing the parsed json and images
|
||||
"""
|
||||
scrape = None
|
||||
serializer = RecipeFromSourceSerializer(data=request.data)
|
||||
if serializer.is_valid():
|
||||
try:
|
||||
if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
|
||||
serializer.validated_data['url'] = bookmarklet.url
|
||||
serializer.validated_data['data'] = bookmarklet.html
|
||||
bookmarklet.delete()
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# headers to use for request to external sites
|
||||
external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
|
||||
if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
|
||||
serializer.validated_data['url'] = bookmarklet.url
|
||||
serializer.validated_data['data'] = bookmarklet.html
|
||||
bookmarklet.delete()
|
||||
|
||||
if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
|
||||
url = serializer.validated_data.get('url', None)
|
||||
data = unquote(serializer.validated_data.get('data', None))
|
||||
if not url and not data:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Nothing to do.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
# in manual mode request complete page to return it later
|
||||
if 'url' in serializer.validated_data:
|
||||
if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', serializer.validated_data['url']):
|
||||
if validators.url(serializer.validated_data['url'], public=True):
|
||||
elif url and not data:
|
||||
if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', url):
|
||||
if validators.url(url, public=True):
|
||||
return Response({
|
||||
'recipe_json': get_from_youtube_scraper(serializer.validated_data['url'], request),
|
||||
'recipe_tree': '',
|
||||
'recipe_html': '',
|
||||
'recipe_json': get_from_youtube_scraper(url, request),
|
||||
# 'recipe_tree': '',
|
||||
# 'recipe_html': '',
|
||||
'recipe_images': [],
|
||||
}, status=status.HTTP_200_OK)
|
||||
try:
|
||||
if validators.url(serializer.validated_data['url'], public=True):
|
||||
serializer.validated_data['data'] = requests.get(serializer.validated_data['url'], headers=external_request_headers).content
|
||||
else:
|
||||
else:
|
||||
try:
|
||||
if validators.url(url, public=True):
|
||||
scrape = scrape_me(url_path=url, wild_mode=True)
|
||||
|
||||
else:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Invalid Url')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except NoSchemaFoundInWildMode:
|
||||
pass
|
||||
except requests.exceptions.ConnectionError:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Invalid Url')
|
||||
'msg': _('Connection Refused.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except requests.exceptions.ConnectionError:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Connection Refused.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except requests.exceptions.MissingSchema:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Bad URL Schema.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
except requests.exceptions.MissingSchema:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('Bad URL Schema.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
else:
|
||||
try:
|
||||
json.loads(data)
|
||||
data = "<script type='application/ld+json'>" + data + "</script>"
|
||||
except JSONDecodeError:
|
||||
pass
|
||||
scrape = text_scraper(text=data, url=url)
|
||||
if not url and (found_url := scrape.schema.data.get('url', None)):
|
||||
scrape = text_scraper(text=data, url=found_url)
|
||||
|
||||
recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request)
|
||||
if len(recipe_tree) == 0 and len(recipe_json) == 0:
|
||||
if scrape:
|
||||
return Response({
|
||||
'recipe_json': helper.get_from_scraper(scrape, request),
|
||||
# 'recipe_tree': recipe_tree,
|
||||
# 'recipe_html': recipe_html,
|
||||
'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
|
||||
}, status=status.HTTP_200_OK)
|
||||
|
||||
else:
|
||||
return Response({
|
||||
'error': True,
|
||||
'msg': _('No usable data could be found.')
|
||||
}, status=status.HTTP_400_BAD_REQUEST)
|
||||
else:
|
||||
return Response({
|
||||
'recipe_json': recipe_json,
|
||||
'recipe_tree': recipe_tree,
|
||||
'recipe_html': recipe_html,
|
||||
'recipe_images': list(dict.fromkeys(recipe_images)),
|
||||
}, status=status.HTTP_200_OK)
|
||||
else:
|
||||
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user