Merge pull request #1917 from smilerz/bookmarklet_fix

Bookmarklet fix
This commit is contained in:
vabene1111
2022-07-11 14:28:08 +02:00
committed by GitHub
8 changed files with 327 additions and 271 deletions

View File

@@ -1,189 +1,191 @@
import json # import json
import re # import re
from json import JSONDecodeError # from json import JSONDecodeError
from urllib.parse import unquote # from urllib.parse import unquote
from bs4 import BeautifulSoup # from bs4 import BeautifulSoup
from bs4.element import Tag # from bs4.element import Tag
from recipe_scrapers import scrape_html, scrape_me # from recipe_scrapers import scrape_html, scrape_me
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode # from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
from recipe_scrapers._utils import get_host_name, normalize_string # from recipe_scrapers._utils import get_host_name, normalize_string
from cookbook.helper import recipe_url_import as helper # from cookbook.helper import recipe_url_import as helper
from cookbook.helper.scrapers.scrapers import text_scraper # from cookbook.helper.scrapers.scrapers import text_scraper
def get_recipe_from_source(text, url, request): # def get_recipe_from_source(text, url, request):
def build_node(k, v): # def build_node(k, v):
if isinstance(v, dict): # if isinstance(v, dict):
node = { # node = {
'name': k, # 'name': k,
'value': k, # 'value': k,
'children': get_children_dict(v) # 'children': get_children_dict(v)
} # }
elif isinstance(v, list): # elif isinstance(v, list):
node = { # node = {
'name': k, # 'name': k,
'value': k, # 'value': k,
'children': get_children_list(v) # 'children': get_children_list(v)
} # }
else: # else:
node = { # node = {
'name': k + ": " + normalize_string(str(v)), # 'name': k + ": " + normalize_string(str(v)),
'value': normalize_string(str(v)) # 'value': normalize_string(str(v))
} # }
return node # return node
def get_children_dict(children): # def get_children_dict(children):
kid_list = [] # kid_list = []
for k, v in children.items(): # for k, v in children.items():
kid_list.append(build_node(k, v)) # kid_list.append(build_node(k, v))
return kid_list # return kid_list
def get_children_list(children): # def get_children_list(children):
kid_list = [] # kid_list = []
for kid in children: # for kid in children:
if type(kid) == list: # if type(kid) == list:
node = { # node = {
'name': "unknown list", # 'name': "unknown list",
'value': "unknown list", # 'value': "unknown list",
'children': get_children_list(kid) # 'children': get_children_list(kid)
} # }
kid_list.append(node) # kid_list.append(node)
elif type(kid) == dict: # elif type(kid) == dict:
for k, v in kid.items(): # for k, v in kid.items():
kid_list.append(build_node(k, v)) # kid_list.append(build_node(k, v))
else: # else:
kid_list.append({ # kid_list.append({
'name': normalize_string(str(kid)), # 'name': normalize_string(str(kid)),
'value': normalize_string(str(kid)) # 'value': normalize_string(str(kid))
}) # })
return kid_list # return kid_list
recipe_tree = [] # recipe_tree = []
parse_list = [] # parse_list = []
soup = BeautifulSoup(text, "html.parser") # soup = BeautifulSoup(text, "html.parser")
html_data = get_from_html(soup) # html_data = get_from_html(soup)
images = get_images_from_source(soup, url) # images = get_images_from_source(soup, url)
text = unquote(text) # text = unquote(text)
scrape = None # scrape = None
if url: # if url and not text:
try: # try:
scrape = scrape_me(url_path=url, wild_mode=True) # scrape = scrape_me(url_path=url, wild_mode=True)
except(NoSchemaFoundInWildMode): # except(NoSchemaFoundInWildMode):
pass # pass
if not scrape:
try:
parse_list.append(remove_graph(json.loads(text)))
if not url and 'url' in parse_list[0]:
url = parse_list[0]['url']
scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
except JSONDecodeError: # if not scrape:
for el in soup.find_all('script', type='application/ld+json'): # try:
el = remove_graph(el) # parse_list.append(remove_graph(json.loads(text)))
if not url and 'url' in el: # if not url and 'url' in parse_list[0]:
url = el['url'] # url = parse_list[0]['url']
if type(el) == list: # scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
for le in el:
parse_list.append(le)
elif type(el) == dict:
parse_list.append(el)
for el in soup.find_all(type='application/json'):
el = remove_graph(el)
if type(el) == list:
for le in el:
parse_list.append(le)
elif type(el) == dict:
parse_list.append(el)
scrape = text_scraper(text, url=url)
recipe_json = helper.get_from_scraper(scrape, request) # except JSONDecodeError:
# for el in soup.find_all('script', type='application/ld+json'):
# el = remove_graph(el)
# if not url and 'url' in el:
# url = el['url']
# if type(el) == list:
# for le in el:
# parse_list.append(le)
# elif type(el) == dict:
# parse_list.append(el)
# for el in soup.find_all(type='application/json'):
# el = remove_graph(el)
# if type(el) == list:
# for le in el:
# parse_list.append(le)
# elif type(el) == dict:
# parse_list.append(el)
# scrape = text_scraper(text, url=url)
for el in parse_list: # recipe_json = helper.get_from_scraper(scrape, request)
temp_tree = []
if isinstance(el, Tag):
try:
el = json.loads(el.string)
except TypeError:
continue
for k, v in el.items(): # # TODO: DEPRECATE recipe_tree & html_data. first validate it isn't used anywhere
if isinstance(v, dict): # for el in parse_list:
node = { # temp_tree = []
'name': k, # if isinstance(el, Tag):
'value': k, # try:
'children': get_children_dict(v) # el = json.loads(el.string)
} # except TypeError:
elif isinstance(v, list): # continue
node = {
'name': k,
'value': k,
'children': get_children_list(v)
}
else:
node = {
'name': k + ": " + normalize_string(str(v)),
'value': normalize_string(str(v))
}
temp_tree.append(node)
if '@type' in el and el['@type'] == 'Recipe': # for k, v in el.items():
recipe_tree += [{'name': 'ld+json', 'children': temp_tree}] # if isinstance(v, dict):
else: # node = {
recipe_tree += [{'name': 'json', 'children': temp_tree}] # 'name': k,
# 'value': k,
# 'children': get_children_dict(v)
# }
# elif isinstance(v, list):
# node = {
# 'name': k,
# 'value': k,
# 'children': get_children_list(v)
# }
# else:
# node = {
# 'name': k + ": " + normalize_string(str(v)),
# 'value': normalize_string(str(v))
# }
# temp_tree.append(node)
return recipe_json, recipe_tree, html_data, images # if '@type' in el and el['@type'] == 'Recipe':
# recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
# else:
# recipe_tree += [{'name': 'json', 'children': temp_tree}]
# return recipe_json, recipe_tree, html_data, images
def get_from_html(soup): # def get_from_html(soup):
INVISIBLE_ELEMS = ('style', 'script', 'head', 'title') # INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
html = [] # html = []
for s in soup.strings: # for s in soup.strings:
if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)): # if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
html.append(s) # html.append(s)
return html # return html
def get_images_from_source(soup, url): # def get_images_from_source(soup, url):
sources = ['src', 'srcset', 'data-src'] # sources = ['src', 'srcset', 'data-src']
images = [] # images = []
img_tags = soup.find_all('img') # img_tags = soup.find_all('img')
if url: # if url:
site = get_host_name(url) # site = get_host_name(url)
prot = url.split(':')[0] # prot = url.split(':')[0]
urls = [] # urls = []
for img in img_tags: # for img in img_tags:
for src in sources: # for src in sources:
try: # try:
urls.append(img[src]) # urls.append(img[src])
except KeyError: # except KeyError:
pass # pass
for u in urls: # for u in urls:
u = u.split('?')[0] # u = u.split('?')[0]
filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u) # filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
if filename: # if filename:
if (('http' not in u) and (url)): # if (('http' not in u) and (url)):
# sometimes an image source can be relative # # sometimes an image source can be relative
# if it is provide the base url # # if it is provide the base url
u = '{}://{}{}'.format(prot, site, u) # u = '{}://{}{}'.format(prot, site, u)
if 'http' in u: # if 'http' in u:
images.append(u) # images.append(u)
return images # return images
def remove_graph(el): # def remove_graph(el):
# recipes type might be wrapped in @graph type # # recipes type might be wrapped in @graph type
if isinstance(el, Tag): # if isinstance(el, Tag):
try: # try:
el = json.loads(el.string) # el = json.loads(el.string)
if '@graph' in el: # if '@graph' in el:
for x in el['@graph']: # for x in el['@graph']:
if '@type' in x and x['@type'] == 'Recipe': # if '@type' in x and x['@type'] == 'Recipe':
el = x # el = x
except (TypeError, JSONDecodeError): # except (TypeError, JSONDecodeError):
pass # pass
return el # return el

View File

@@ -1,21 +1,19 @@
import random import random
import re import re
from html import unescape from html import unescape
from pytube import YouTube
from unicodedata import decomposition from unicodedata import decomposition
from django.utils.dateparse import parse_duration from django.utils.dateparse import parse_duration
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from isodate import parse_duration as iso_parse_duration from isodate import parse_duration as iso_parse_duration
from isodate.isoerror import ISO8601Error from isodate.isoerror import ISO8601Error
from recipe_scrapers._utils import get_minutes from pytube import YouTube
from recipe_scrapers._utils import get_host_name, get_minutes
from cookbook.helper import recipe_url_import as helper from cookbook.helper import recipe_url_import as helper
from cookbook.helper.ingredient_parser import IngredientParser from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.models import Keyword from cookbook.models import Keyword
# from recipe_scrapers._utils import get_minutes ## temporary until/unless upstream incorporates get_minutes() PR # from recipe_scrapers._utils import get_minutes ## temporary until/unless upstream incorporates get_minutes() PR
@@ -369,3 +367,32 @@ def iso_duration_to_minutes(string):
string string
).groupdict() ).groupdict()
return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0) return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)
def get_images_from_soup(soup, url):
sources = ['src', 'srcset', 'data-src']
images = []
img_tags = soup.find_all('img')
if url:
site = get_host_name(url)
prot = url.split(':')[0]
urls = []
for img in img_tags:
for src in sources:
try:
urls.append(img[src])
except KeyError:
pass
for u in urls:
u = u.split('?')[0]
filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
if filename:
if (('http' not in u) and (url)):
# sometimes an image source can be relative
# if it is provide the base url
u = '{}://{}{}'.format(prot, site, u)
if 'http' in u:
images.append(u)
return images

View File

@@ -1,6 +1,7 @@
from bs4 import BeautifulSoup
from json import JSONDecodeError from json import JSONDecodeError
from recipe_scrapers import SCRAPERS
from bs4 import BeautifulSoup
from recipe_scrapers import SCRAPERS, get_host_name
from recipe_scrapers._factory import SchemaScraperFactory from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg from recipe_scrapers._schemaorg import SchemaOrg
@@ -15,22 +16,28 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
def text_scraper(text, url=None): def text_scraper(text, url=None):
scraper_class = SchemaScraperFactory.SchemaScraper domain = None
if url:
domain = get_host_name(url)
if domain in SCRAPERS:
scraper_class = SCRAPERS[domain]
else:
scraper_class = SchemaScraperFactory.SchemaScraper
class TextScraper(scraper_class): class TextScraper(scraper_class):
def __init__( def __init__(
self, self,
page_data, html=None,
url=None url=None,
): ):
self.wild_mode = False self.wild_mode = False
self.meta_http_equiv = False self.meta_http_equiv = False
self.soup = BeautifulSoup(page_data, "html.parser") self.soup = BeautifulSoup(html, "html.parser")
self.url = url self.url = url
self.recipe = None self.recipe = None
try: try:
self.schema = SchemaOrg(page_data) self.schema = SchemaOrg(html)
except (JSONDecodeError, AttributeError): except (JSONDecodeError, AttributeError):
pass pass
return TextScraper(text, url) return TextScraper(url=url, html=text)

View File

@@ -10,8 +10,8 @@ import validators
import yaml import yaml
from cookbook.helper.ingredient_parser import IngredientParser from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.helper.recipe_html_import import get_recipe_from_source from cookbook.helper.recipe_url_import import get_images_from_soup, iso_duration_to_minutes
from cookbook.helper.recipe_url_import import iso_duration_to_minutes from cookbook.helper.scrapers.scrapers import text_scraper
from cookbook.integration.integration import Integration from cookbook.integration.integration import Integration
from cookbook.models import Ingredient, Keyword, Recipe, Step from cookbook.models import Ingredient, Keyword, Recipe, Step
@@ -24,7 +24,10 @@ class CookBookApp(Integration):
def get_recipe_from_file(self, file): def get_recipe_from_file(self, file):
recipe_html = file.getvalue().decode("utf-8") recipe_html = file.getvalue().decode("utf-8")
recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request) # recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
scrape = text_scraper(text=data)
recipe_json = helper.get_from_scraper(scrape, request)
images = list(dict.fromkeys(get_images_from_soup(scrape.soup, url)))
recipe = Recipe.objects.create( recipe = Recipe.objects.create(
name=recipe_json['name'].strip(), name=recipe_json['name'].strip(),

View File

@@ -3,10 +3,9 @@ from io import BytesIO
from zipfile import ZipFile from zipfile import ZipFile
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from cookbook.helper.ingredient_parser import IngredientParser from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.helper.recipe_html_import import get_recipe_from_source
from cookbook.helper.recipe_url_import import iso_duration_to_minutes, parse_servings from cookbook.helper.recipe_url_import import iso_duration_to_minutes, parse_servings
from cookbook.integration.integration import Integration from cookbook.integration.integration import Integration
from cookbook.models import Ingredient, Keyword, Recipe, Step from cookbook.models import Ingredient, Keyword, Recipe, Step

View File

@@ -1,12 +1,11 @@
import traceback import traceback
from datetime import timedelta, datetime from datetime import datetime, timedelta
from decimal import Decimal from decimal import Decimal
from gettext import gettext as _ from gettext import gettext as _
from html import escape from html import escape
from smtplib import SMTPException from smtplib import SMTPException
from PIL import Image from django.contrib.auth.models import Group, User
from django.contrib.auth.models import User, Group
from django.core.mail import send_mail from django.core.mail import send_mail
from django.db.models import Avg, Q, QuerySet, Sum from django.db.models import Avg, Q, QuerySet, Sum
from django.http import BadHeaderError from django.http import BadHeaderError
@@ -14,6 +13,7 @@ from django.urls import reverse
from django.utils import timezone from django.utils import timezone
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from drf_writable_nested import UniqueFieldsMixin, WritableNestedModelSerializer from drf_writable_nested import UniqueFieldsMixin, WritableNestedModelSerializer
from PIL import Image
from rest_framework import serializers from rest_framework import serializers
from rest_framework.exceptions import NotFound, ValidationError from rest_framework.exceptions import NotFound, ValidationError
@@ -22,14 +22,14 @@ from cookbook.helper.HelperFunctions import str2bool
from cookbook.helper.permission_helper import above_space_limit from cookbook.helper.permission_helper import above_space_limit
from cookbook.helper.shopping_helper import RecipeShoppingEditor from cookbook.helper.shopping_helper import RecipeShoppingEditor
from cookbook.models import (Automation, BookmarkletImport, Comment, CookLog, CustomFilter, from cookbook.models import (Automation, BookmarkletImport, Comment, CookLog, CustomFilter,
ExportLog, Food, FoodInheritField, ImportLog, Ingredient, Keyword, ExportLog, Food, FoodInheritField, ImportLog, Ingredient, InviteLink,
MealPlan, MealType, NutritionInformation, Recipe, RecipeBook, Keyword, MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
RecipeBookEntry, RecipeImport, ShareLink, ShoppingList, RecipeBookEntry, RecipeImport, ShareLink, ShoppingList,
ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket, ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit, Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink) SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
from cookbook.templatetags.custom_tags import markdown from cookbook.templatetags.custom_tags import markdown
from recipes.settings import MEDIA_URL, AWS_ENABLED from recipes.settings import AWS_ENABLED, MEDIA_URL
class ExtendedRecipeMixin(serializers.ModelSerializer): class ExtendedRecipeMixin(serializers.ModelSerializer):
@@ -193,7 +193,8 @@ class SpaceSerializer(WritableNestedModelSerializer):
class Meta: class Meta:
model = Space model = Space
fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',) fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users',
'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
read_only_fields = ('id', 'created_by', 'created_at', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo',) read_only_fields = ('id', 'created_by', 'created_at', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo',)
@@ -815,7 +816,7 @@ class RecipeBookEntrySerializer(serializers.ModelSerializer):
book = validated_data['book'] book = validated_data['book']
recipe = validated_data['recipe'] recipe = validated_data['recipe']
if not book.get_owner() == self.context['request'].user and not self.context[ if not book.get_owner() == self.context['request'].user and not self.context[
'request'].user in book.get_shared(): 'request'].user in book.get_shared():
raise NotFound(detail=None, code=None) raise NotFound(detail=None, code=None)
obj, created = RecipeBookEntry.objects.get_or_create(book=book, recipe=recipe) obj, created = RecipeBookEntry.objects.get_or_create(book=book, recipe=recipe)
return obj return obj
@@ -871,11 +872,11 @@ class ShoppingListRecipeSerializer(serializers.ModelSerializer):
value = value.quantize( value = value.quantize(
Decimal(1)) if value == value.to_integral() else value.normalize() # strips trailing zero Decimal(1)) if value == value.to_integral() else value.normalize() # strips trailing zero
return ( return (
obj.name obj.name
or getattr(obj.mealplan, 'title', None) or getattr(obj.mealplan, 'title', None)
or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)]) or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
or obj.recipe.name or obj.recipe.name
) + f' ({value:.2g})' ) + f' ({value:.2g})'
def update(self, instance, validated_data): def update(self, instance, validated_data):
# TODO remove once old shopping list # TODO remove once old shopping list
@@ -1232,6 +1233,6 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
# non model serializers # non model serializers
class RecipeFromSourceSerializer(serializers.Serializer): class RecipeFromSourceSerializer(serializers.Serializer):
url = serializers.CharField(max_length=4096, required=False, allow_null=True) url = serializers.CharField(max_length=4096, required=False, allow_null=True, allow_blank=True)
data = serializers.CharField(required=False, allow_null=True, allow_blank=True) data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
bookmarklet = serializers.IntegerField(required=False, allow_null=True, ) bookmarklet = serializers.IntegerField(required=False, allow_null=True, )

View File

@@ -5,20 +5,20 @@ import re
import traceback import traceback
import uuid import uuid
from collections import OrderedDict from collections import OrderedDict
from json import JSONDecodeError
from urllib.parse import unquote
from zipfile import ZipFile from zipfile import ZipFile
import requests import requests
import validators import validators
from PIL import UnidentifiedImageError
from annoying.decorators import ajax_request from annoying.decorators import ajax_request
from annoying.functions import get_object_or_None from annoying.functions import get_object_or_None
from django.contrib import messages from django.contrib import messages
from django.contrib.auth.models import User, Group from django.contrib.auth.models import Group, User
from django.contrib.postgres.search import TrigramSimilarity from django.contrib.postgres.search import TrigramSimilarity
from django.core.exceptions import FieldError, ValidationError from django.core.exceptions import FieldError, ValidationError
from django.core.files import File from django.core.files import File
from django.db.models import (Case, Count, Exists, OuterRef, ProtectedError, Q, from django.db.models import Case, Count, Exists, OuterRef, ProtectedError, Q, Subquery, Value, When
Subquery, Value, When)
from django.db.models.fields.related import ForeignObjectRel from django.db.models.fields.related import ForeignObjectRel
from django.db.models.functions import Coalesce, Lower from django.db.models.functions import Coalesce, Lower
from django.http import FileResponse, HttpResponse, JsonResponse from django.http import FileResponse, HttpResponse, JsonResponse
@@ -27,6 +27,9 @@ from django.urls import reverse
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from django_scopes import scopes_disabled from django_scopes import scopes_disabled
from icalendar import Calendar, Event from icalendar import Calendar, Event
from PIL import UnidentifiedImageError
from recipe_scrapers import scrape_html, scrape_me
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
from requests.exceptions import MissingSchema from requests.exceptions import MissingSchema
from rest_framework import decorators, status, viewsets from rest_framework import decorators, status, viewsets
from rest_framework.authtoken.models import Token from rest_framework.authtoken.models import Token
@@ -41,43 +44,47 @@ from rest_framework.throttling import AnonRateThrottle
from rest_framework.viewsets import ViewSetMixin from rest_framework.viewsets import ViewSetMixin
from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
from cookbook.helper import recipe_url_import as helper
from cookbook.helper.HelperFunctions import str2bool from cookbook.helper.HelperFunctions import str2bool
from cookbook.helper.image_processing import handle_image from cookbook.helper.image_processing import handle_image
from cookbook.helper.ingredient_parser import IngredientParser from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, CustomIsOwner, from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, CustomIsOwner,
CustomIsShare, CustomIsShared, CustomIsUser, CustomIsOwnerReadOnly, CustomIsShare, CustomIsShared,
group_required, CustomIsSpaceOwner, switch_user_active_space, is_space_owner, CustomIsOwnerReadOnly) CustomIsSpaceOwner, CustomIsUser, group_required,
from cookbook.helper.recipe_html_import import get_recipe_from_source is_space_owner, switch_user_active_space)
from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search
from cookbook.helper.recipe_url_import import get_from_youtube_scraper from cookbook.helper.recipe_url_import import get_from_youtube_scraper, get_images_from_soup
from cookbook.helper.scrapers.scrapers import text_scraper
from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food, from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
FoodInheritField, ImportLog, Ingredient, Keyword, MealPlan, MealType, FoodInheritField, ImportLog, Ingredient, InviteLink, Keyword, MealPlan,
Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList, MealType, Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket, ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit, Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink) SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
from cookbook.provider.dropbox import Dropbox from cookbook.provider.dropbox import Dropbox
from cookbook.provider.local import Local from cookbook.provider.local import Local
from cookbook.provider.nextcloud import Nextcloud from cookbook.provider.nextcloud import Nextcloud
from cookbook.schemas import FilterSchema, QueryParam, QueryParamAutoSchema, TreeSchema from cookbook.schemas import FilterSchema, QueryParam, QueryParamAutoSchema, TreeSchema
from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializer, from cookbook.serializer import (AutomationSerializer, BookmarkletImportListSerializer,
CookLogSerializer, CustomFilterSerializer, ExportLogSerializer, BookmarkletImportSerializer, CookLogSerializer,
CustomFilterSerializer, ExportLogSerializer,
FoodInheritFieldSerializer, FoodSerializer, FoodInheritFieldSerializer, FoodSerializer,
FoodShoppingUpdateSerializer, ImportLogSerializer, FoodShoppingUpdateSerializer, GroupSerializer, ImportLogSerializer,
IngredientSerializer, KeywordSerializer, MealPlanSerializer, IngredientSerializer, IngredientSimpleSerializer,
InviteLinkSerializer, KeywordSerializer, MealPlanSerializer,
MealTypeSerializer, RecipeBookEntrySerializer, MealTypeSerializer, RecipeBookEntrySerializer,
RecipeBookSerializer, RecipeImageSerializer, RecipeBookSerializer, RecipeFromSourceSerializer,
RecipeOverviewSerializer, RecipeSerializer, RecipeImageSerializer, RecipeOverviewSerializer, RecipeSerializer,
RecipeShoppingUpdateSerializer, RecipeSimpleSerializer, RecipeShoppingUpdateSerializer, RecipeSimpleSerializer,
ShoppingListAutoSyncSerializer, ShoppingListEntrySerializer, ShoppingListAutoSyncSerializer, ShoppingListEntrySerializer,
ShoppingListRecipeSerializer, ShoppingListSerializer, ShoppingListRecipeSerializer, ShoppingListSerializer,
StepSerializer, StorageSerializer, SpaceSerializer, StepSerializer, StorageSerializer,
SupermarketCategoryRelationSerializer, SupermarketCategoryRelationSerializer,
SupermarketCategorySerializer, SupermarketSerializer, SupermarketCategorySerializer, SupermarketSerializer,
SyncLogSerializer, SyncSerializer, UnitSerializer, SyncLogSerializer, SyncSerializer, UnitSerializer,
UserFileSerializer, UserNameSerializer, UserPreferenceSerializer, UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer, SpaceSerializer, UserSpaceSerializer, GroupSerializer, InviteLinkSerializer) UserSpaceSerializer, ViewLogSerializer)
from recipes import settings from recipes import settings
@@ -713,7 +720,7 @@ class RecipeViewSet(viewsets.ModelViewSet):
'Query string matched (fuzzy) against recipe name. In the future also fulltext search.')), 'Query string matched (fuzzy) against recipe name. In the future also fulltext search.')),
QueryParam(name='keywords', description=_( QueryParam(name='keywords', description=_(
'ID of keyword a recipe should have. For multiple repeat parameter. Equivalent to keywords_or'), 'ID of keyword a recipe should have. For multiple repeat parameter. Equivalent to keywords_or'),
qtype='int'), qtype='int'),
QueryParam(name='keywords_or', QueryParam(name='keywords_or',
description=_('Keyword IDs, repeat for multiple. Return recipes with any of the keywords'), description=_('Keyword IDs, repeat for multiple. Return recipes with any of the keywords'),
qtype='int'), qtype='int'),
@@ -1114,69 +1121,79 @@ def recipe_from_source(request):
- url: url to use for importing recipe - url: url to use for importing recipe
- data: if no url is given recipe is imported from provided source data - data: if no url is given recipe is imported from provided source data
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes - (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
:return: JsonResponse containing the parsed json, original html,json and images :return: JsonResponse containing the parsed json and images
""" """
scrape = None
serializer = RecipeFromSourceSerializer(data=request.data) serializer = RecipeFromSourceSerializer(data=request.data)
if serializer.is_valid(): if serializer.is_valid():
try:
if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
serializer.validated_data['url'] = bookmarklet.url
serializer.validated_data['data'] = bookmarklet.html
bookmarklet.delete()
except KeyError:
pass
# headers to use for request to external sites if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"} serializer.validated_data['url'] = bookmarklet.url
serializer.validated_data['data'] = bookmarklet.html
bookmarklet.delete()
if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data: url = serializer.validated_data.get('url', None)
data = unquote(serializer.validated_data.get('data', None))
if not url and not data:
return Response({ return Response({
'error': True, 'error': True,
'msg': _('Nothing to do.') 'msg': _('Nothing to do.')
}, status=status.HTTP_400_BAD_REQUEST) }, status=status.HTTP_400_BAD_REQUEST)
# in manual mode request complete page to return it later elif url and not data:
if 'url' in serializer.validated_data: if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', url):
if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', serializer.validated_data['url']): if validators.url(url, public=True):
if validators.url(serializer.validated_data['url'], public=True):
return Response({ return Response({
'recipe_json': get_from_youtube_scraper(serializer.validated_data['url'], request), 'recipe_json': get_from_youtube_scraper(url, request),
'recipe_tree': '', # 'recipe_tree': '',
'recipe_html': '', # 'recipe_html': '',
'recipe_images': [], 'recipe_images': [],
}, status=status.HTTP_200_OK) }, status=status.HTTP_200_OK)
try: else:
if validators.url(serializer.validated_data['url'], public=True): try:
serializer.validated_data['data'] = requests.get(serializer.validated_data['url'], headers=external_request_headers).content if validators.url(url, public=True):
else: scrape = scrape_me(url_path=url, wild_mode=True)
else:
return Response({
'error': True,
'msg': _('Invalid Url')
}, status=status.HTTP_400_BAD_REQUEST)
except NoSchemaFoundInWildMode:
pass
except requests.exceptions.ConnectionError:
return Response({ return Response({
'error': True, 'error': True,
'msg': _('Invalid Url') 'msg': _('Connection Refused.')
}, status=status.HTTP_400_BAD_REQUEST) }, status=status.HTTP_400_BAD_REQUEST)
except requests.exceptions.ConnectionError: except requests.exceptions.MissingSchema:
return Response({ return Response({
'error': True, 'error': True,
'msg': _('Connection Refused.') 'msg': _('Bad URL Schema.')
}, status=status.HTTP_400_BAD_REQUEST) }, status=status.HTTP_400_BAD_REQUEST)
except requests.exceptions.MissingSchema: else:
return Response({ try:
'error': True, json.loads(data)
'msg': _('Bad URL Schema.') data = "<script type='application/ld+json'>" + data + "</script>"
}, status=status.HTTP_400_BAD_REQUEST) except JSONDecodeError:
pass
scrape = text_scraper(text=data, url=url)
if not url and (found_url := scrape.schema.data.get('url', None)):
scrape = text_scraper(text=data, url=found_url)
recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request) if scrape:
if len(recipe_tree) == 0 and len(recipe_json) == 0: return Response({
'recipe_json': helper.get_from_scraper(scrape, request),
# 'recipe_tree': recipe_tree,
# 'recipe_html': recipe_html,
'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
}, status=status.HTTP_200_OK)
else:
return Response({ return Response({
'error': True, 'error': True,
'msg': _('No usable data could be found.') 'msg': _('No usable data could be found.')
}, status=status.HTTP_400_BAD_REQUEST) }, status=status.HTTP_400_BAD_REQUEST)
else:
return Response({
'recipe_json': recipe_json,
'recipe_tree': recipe_tree,
'recipe_html': recipe_html,
'recipe_images': list(dict.fromkeys(recipe_images)),
}, status=status.HTTP_200_OK)
else: else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)

View File

@@ -461,8 +461,8 @@ export default {
recent_urls: [], recent_urls: [],
source_data: '', source_data: '',
recipe_json: undefined, recipe_json: undefined,
recipe_html: undefined, // recipe_html: undefined,
recipe_tree: undefined, // recipe_tree: undefined,
recipe_images: [], recipe_images: [],
imported_recipes: [], imported_recipes: [],
failed_imports: [], failed_imports: [],
@@ -593,9 +593,9 @@ export default {
} }
// reset all variables // reset all variables
this.recipe_html = undefined // this.recipe_html = undefined
this.recipe_json = undefined this.recipe_json = undefined
this.recipe_tree = undefined // this.recipe_tree = undefined
this.recipe_images = [] this.recipe_images = []
// load recipe // load recipe
@@ -621,8 +621,8 @@ export default {
return x return x
}) })
this.recipe_tree = response.data['recipe_tree']; // this.recipe_tree = response.data['recipe_tree'];
this.recipe_html = response.data['recipe_html']; // this.recipe_html = response.data['recipe_html'];
this.recipe_images = response.data['recipe_images'] !== undefined ? response.data['recipe_images'] : []; this.recipe_images = response.data['recipe_images'] !== undefined ? response.data['recipe_images'] : [];
if (!silent) { if (!silent) {