diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index 1b5d37ad2..95f115b76 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -1,189 +1,191 @@
-import json
-import re
-from json import JSONDecodeError
-from urllib.parse import unquote
+# import json
+# import re
+# from json import JSONDecodeError
+# from urllib.parse import unquote
-from bs4 import BeautifulSoup
-from bs4.element import Tag
-from recipe_scrapers import scrape_html, scrape_me
-from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
-from recipe_scrapers._utils import get_host_name, normalize_string
+# from bs4 import BeautifulSoup
+# from bs4.element import Tag
+# from recipe_scrapers import scrape_html, scrape_me
+# from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
+# from recipe_scrapers._utils import get_host_name, normalize_string
-from cookbook.helper import recipe_url_import as helper
-from cookbook.helper.scrapers.scrapers import text_scraper
+# from cookbook.helper import recipe_url_import as helper
+# from cookbook.helper.scrapers.scrapers import text_scraper
-def get_recipe_from_source(text, url, request):
- def build_node(k, v):
- if isinstance(v, dict):
- node = {
- 'name': k,
- 'value': k,
- 'children': get_children_dict(v)
- }
- elif isinstance(v, list):
- node = {
- 'name': k,
- 'value': k,
- 'children': get_children_list(v)
- }
- else:
- node = {
- 'name': k + ": " + normalize_string(str(v)),
- 'value': normalize_string(str(v))
- }
- return node
+# def get_recipe_from_source(text, url, request):
+# def build_node(k, v):
+# if isinstance(v, dict):
+# node = {
+# 'name': k,
+# 'value': k,
+# 'children': get_children_dict(v)
+# }
+# elif isinstance(v, list):
+# node = {
+# 'name': k,
+# 'value': k,
+# 'children': get_children_list(v)
+# }
+# else:
+# node = {
+# 'name': k + ": " + normalize_string(str(v)),
+# 'value': normalize_string(str(v))
+# }
+# return node
- def get_children_dict(children):
- kid_list = []
- for k, v in children.items():
- kid_list.append(build_node(k, v))
- return kid_list
+# def get_children_dict(children):
+# kid_list = []
+# for k, v in children.items():
+# kid_list.append(build_node(k, v))
+# return kid_list
- def get_children_list(children):
- kid_list = []
- for kid in children:
- if type(kid) == list:
- node = {
- 'name': "unknown list",
- 'value': "unknown list",
- 'children': get_children_list(kid)
- }
- kid_list.append(node)
- elif type(kid) == dict:
- for k, v in kid.items():
- kid_list.append(build_node(k, v))
- else:
- kid_list.append({
- 'name': normalize_string(str(kid)),
- 'value': normalize_string(str(kid))
- })
- return kid_list
+# def get_children_list(children):
+# kid_list = []
+# for kid in children:
+# if type(kid) == list:
+# node = {
+# 'name': "unknown list",
+# 'value': "unknown list",
+# 'children': get_children_list(kid)
+# }
+# kid_list.append(node)
+# elif type(kid) == dict:
+# for k, v in kid.items():
+# kid_list.append(build_node(k, v))
+# else:
+# kid_list.append({
+# 'name': normalize_string(str(kid)),
+# 'value': normalize_string(str(kid))
+# })
+# return kid_list
- recipe_tree = []
- parse_list = []
- soup = BeautifulSoup(text, "html.parser")
- html_data = get_from_html(soup)
- images = get_images_from_source(soup, url)
- text = unquote(text)
- scrape = None
+# recipe_tree = []
+# parse_list = []
+# soup = BeautifulSoup(text, "html.parser")
+# html_data = get_from_html(soup)
+# images = get_images_from_source(soup, url)
+# text = unquote(text)
+# scrape = None
- if url:
- try:
- scrape = scrape_me(url_path=url, wild_mode=True)
- except(NoSchemaFoundInWildMode):
- pass
- if not scrape:
- try:
- parse_list.append(remove_graph(json.loads(text)))
- if not url and 'url' in parse_list[0]:
- url = parse_list[0]['url']
- scrape = text_scraper("", url=url)
+# if url and not text:
+# try:
+# scrape = scrape_me(url_path=url, wild_mode=True)
+# except(NoSchemaFoundInWildMode):
+# pass
- except JSONDecodeError:
- for el in soup.find_all('script', type='application/ld+json'):
- el = remove_graph(el)
- if not url and 'url' in el:
- url = el['url']
- if type(el) == list:
- for le in el:
- parse_list.append(le)
- elif type(el) == dict:
- parse_list.append(el)
- for el in soup.find_all(type='application/json'):
- el = remove_graph(el)
- if type(el) == list:
- for le in el:
- parse_list.append(le)
- elif type(el) == dict:
- parse_list.append(el)
- scrape = text_scraper(text, url=url)
+# if not scrape:
+# try:
+# parse_list.append(remove_graph(json.loads(text)))
+# if not url and 'url' in parse_list[0]:
+# url = parse_list[0]['url']
+# scrape = text_scraper("", url=url)
- recipe_json = helper.get_from_scraper(scrape, request)
+# except JSONDecodeError:
+# for el in soup.find_all('script', type='application/ld+json'):
+# el = remove_graph(el)
+# if not url and 'url' in el:
+# url = el['url']
+# if type(el) == list:
+# for le in el:
+# parse_list.append(le)
+# elif type(el) == dict:
+# parse_list.append(el)
+# for el in soup.find_all(type='application/json'):
+# el = remove_graph(el)
+# if type(el) == list:
+# for le in el:
+# parse_list.append(le)
+# elif type(el) == dict:
+# parse_list.append(el)
+# scrape = text_scraper(text, url=url)
- for el in parse_list:
- temp_tree = []
- if isinstance(el, Tag):
- try:
- el = json.loads(el.string)
- except TypeError:
- continue
+# recipe_json = helper.get_from_scraper(scrape, request)
- for k, v in el.items():
- if isinstance(v, dict):
- node = {
- 'name': k,
- 'value': k,
- 'children': get_children_dict(v)
- }
- elif isinstance(v, list):
- node = {
- 'name': k,
- 'value': k,
- 'children': get_children_list(v)
- }
- else:
- node = {
- 'name': k + ": " + normalize_string(str(v)),
- 'value': normalize_string(str(v))
- }
- temp_tree.append(node)
+# # TODO: DEPRECATE recipe_tree & html_data. first validate it isn't used anywhere
+# for el in parse_list:
+# temp_tree = []
+# if isinstance(el, Tag):
+# try:
+# el = json.loads(el.string)
+# except TypeError:
+# continue
- if '@type' in el and el['@type'] == 'Recipe':
- recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
- else:
- recipe_tree += [{'name': 'json', 'children': temp_tree}]
+# for k, v in el.items():
+# if isinstance(v, dict):
+# node = {
+# 'name': k,
+# 'value': k,
+# 'children': get_children_dict(v)
+# }
+# elif isinstance(v, list):
+# node = {
+# 'name': k,
+# 'value': k,
+# 'children': get_children_list(v)
+# }
+# else:
+# node = {
+# 'name': k + ": " + normalize_string(str(v)),
+# 'value': normalize_string(str(v))
+# }
+# temp_tree.append(node)
- return recipe_json, recipe_tree, html_data, images
+# if '@type' in el and el['@type'] == 'Recipe':
+# recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
+# else:
+# recipe_tree += [{'name': 'json', 'children': temp_tree}]
+
+# return recipe_json, recipe_tree, html_data, images
-def get_from_html(soup):
- INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
- html = []
- for s in soup.strings:
- if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
- html.append(s)
- return html
+# def get_from_html(soup):
+# INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
+# html = []
+# for s in soup.strings:
+# if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
+# html.append(s)
+# return html
-def get_images_from_source(soup, url):
- sources = ['src', 'srcset', 'data-src']
- images = []
- img_tags = soup.find_all('img')
- if url:
- site = get_host_name(url)
- prot = url.split(':')[0]
+# def get_images_from_source(soup, url):
+# sources = ['src', 'srcset', 'data-src']
+# images = []
+# img_tags = soup.find_all('img')
+# if url:
+# site = get_host_name(url)
+# prot = url.split(':')[0]
- urls = []
- for img in img_tags:
- for src in sources:
- try:
- urls.append(img[src])
- except KeyError:
- pass
+# urls = []
+# for img in img_tags:
+# for src in sources:
+# try:
+# urls.append(img[src])
+# except KeyError:
+# pass
- for u in urls:
- u = u.split('?')[0]
- filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
- if filename:
- if (('http' not in u) and (url)):
- # sometimes an image source can be relative
- # if it is provide the base url
- u = '{}://{}{}'.format(prot, site, u)
- if 'http' in u:
- images.append(u)
- return images
+# for u in urls:
+# u = u.split('?')[0]
+# filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
+# if filename:
+# if (('http' not in u) and (url)):
+# # sometimes an image source can be relative
+# # if it is provide the base url
+# u = '{}://{}{}'.format(prot, site, u)
+# if 'http' in u:
+# images.append(u)
+# return images
-def remove_graph(el):
- # recipes type might be wrapped in @graph type
- if isinstance(el, Tag):
- try:
- el = json.loads(el.string)
- if '@graph' in el:
- for x in el['@graph']:
- if '@type' in x and x['@type'] == 'Recipe':
- el = x
- except (TypeError, JSONDecodeError):
- pass
- return el
+# def remove_graph(el):
+# # recipes type might be wrapped in @graph type
+# if isinstance(el, Tag):
+# try:
+# el = json.loads(el.string)
+# if '@graph' in el:
+# for x in el['@graph']:
+# if '@type' in x and x['@type'] == 'Recipe':
+# el = x
+# except (TypeError, JSONDecodeError):
+# pass
+# return el
diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py
index aa3cc5cff..cec57e729 100644
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -1,21 +1,19 @@
import random
import re
from html import unescape
-
-from pytube import YouTube
from unicodedata import decomposition
from django.utils.dateparse import parse_duration
from django.utils.translation import gettext as _
from isodate import parse_duration as iso_parse_duration
from isodate.isoerror import ISO8601Error
-from recipe_scrapers._utils import get_minutes
+from pytube import YouTube
+from recipe_scrapers._utils import get_host_name, get_minutes
from cookbook.helper import recipe_url_import as helper
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.models import Keyword
-
# from recipe_scrapers._utils import get_minutes ## temporary until/unless upstream incorporates get_minutes() PR
@@ -369,3 +367,32 @@ def iso_duration_to_minutes(string):
string
).groupdict()
return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)
+
+
+def get_images_from_soup(soup, url):
+ sources = ['src', 'srcset', 'data-src']
+ images = []
+ img_tags = soup.find_all('img')
+ if url:
+ site = get_host_name(url)
+ prot = url.split(':')[0]
+
+ urls = []
+ for img in img_tags:
+ for src in sources:
+ try:
+ urls.append(img[src])
+ except KeyError:
+ pass
+
+ for u in urls:
+ u = u.split('?')[0]
+ filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
+ if filename:
+ if (('http' not in u) and (url)):
+ # sometimes an image source can be relative
+ # if it is provide the base url
+ u = '{}://{}{}'.format(prot, site, u)
+ if 'http' in u:
+ images.append(u)
+ return images
diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py
index eb93cc2c2..7d6c08b15 100644
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -1,6 +1,7 @@
-from bs4 import BeautifulSoup
from json import JSONDecodeError
-from recipe_scrapers import SCRAPERS
+
+from bs4 import BeautifulSoup
+from recipe_scrapers import SCRAPERS, get_host_name
from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
@@ -15,22 +16,28 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
def text_scraper(text, url=None):
- scraper_class = SchemaScraperFactory.SchemaScraper
+ domain = None
+ if url:
+ domain = get_host_name(url)
+ if domain in SCRAPERS:
+ scraper_class = SCRAPERS[domain]
+ else:
+ scraper_class = SchemaScraperFactory.SchemaScraper
class TextScraper(scraper_class):
def __init__(
self,
- page_data,
- url=None
+ html=None,
+ url=None,
):
self.wild_mode = False
self.meta_http_equiv = False
- self.soup = BeautifulSoup(page_data, "html.parser")
+ self.soup = BeautifulSoup(html, "html.parser")
self.url = url
self.recipe = None
try:
- self.schema = SchemaOrg(page_data)
+ self.schema = SchemaOrg(html)
except (JSONDecodeError, AttributeError):
pass
- return TextScraper(text, url)
+ return TextScraper(url=url, html=text)
diff --git a/cookbook/integration/cookbookapp.py b/cookbook/integration/cookbookapp.py
index f22e9d45d..7ff50ab62 100644
--- a/cookbook/integration/cookbookapp.py
+++ b/cookbook/integration/cookbookapp.py
@@ -10,8 +10,8 @@ import validators
import yaml
from cookbook.helper.ingredient_parser import IngredientParser
-from cookbook.helper.recipe_html_import import get_recipe_from_source
-from cookbook.helper.recipe_url_import import iso_duration_to_minutes
+from cookbook.helper.recipe_url_import import get_images_from_soup, iso_duration_to_minutes
+from cookbook.helper.scrapers.scrapers import text_scraper
from cookbook.integration.integration import Integration
from cookbook.models import Ingredient, Keyword, Recipe, Step
@@ -24,7 +24,10 @@ class CookBookApp(Integration):
def get_recipe_from_file(self, file):
recipe_html = file.getvalue().decode("utf-8")
- recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
+ # recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
+ scrape = text_scraper(text=data)
+ recipe_json = helper.get_from_scraper(scrape, request)
+ images = list(dict.fromkeys(get_images_from_soup(scrape.soup, url)))
recipe = Recipe.objects.create(
name=recipe_json['name'].strip(),
diff --git a/cookbook/integration/copymethat.py b/cookbook/integration/copymethat.py
index 7a2a532f9..2a9c56521 100644
--- a/cookbook/integration/copymethat.py
+++ b/cookbook/integration/copymethat.py
@@ -3,10 +3,9 @@ from io import BytesIO
from zipfile import ZipFile
from bs4 import BeautifulSoup
-
from django.utils.translation import gettext as _
+
from cookbook.helper.ingredient_parser import IngredientParser
-from cookbook.helper.recipe_html_import import get_recipe_from_source
from cookbook.helper.recipe_url_import import iso_duration_to_minutes, parse_servings
from cookbook.integration.integration import Integration
from cookbook.models import Ingredient, Keyword, Recipe, Step
diff --git a/cookbook/serializer.py b/cookbook/serializer.py
index 1e386c5dd..a4b8b2fe9 100644
--- a/cookbook/serializer.py
+++ b/cookbook/serializer.py
@@ -1,12 +1,11 @@
import traceback
-from datetime import timedelta, datetime
+from datetime import datetime, timedelta
from decimal import Decimal
from gettext import gettext as _
from html import escape
from smtplib import SMTPException
-from PIL import Image
-from django.contrib.auth.models import User, Group
+from django.contrib.auth.models import Group, User
from django.core.mail import send_mail
from django.db.models import Avg, Q, QuerySet, Sum
from django.http import BadHeaderError
@@ -14,6 +13,7 @@ from django.urls import reverse
from django.utils import timezone
from django_scopes import scopes_disabled
from drf_writable_nested import UniqueFieldsMixin, WritableNestedModelSerializer
+from PIL import Image
from rest_framework import serializers
from rest_framework.exceptions import NotFound, ValidationError
@@ -22,14 +22,14 @@ from cookbook.helper.HelperFunctions import str2bool
from cookbook.helper.permission_helper import above_space_limit
from cookbook.helper.shopping_helper import RecipeShoppingEditor
from cookbook.models import (Automation, BookmarkletImport, Comment, CookLog, CustomFilter,
- ExportLog, Food, FoodInheritField, ImportLog, Ingredient, Keyword,
- MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
+ ExportLog, Food, FoodInheritField, ImportLog, Ingredient, InviteLink,
+ Keyword, MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
RecipeBookEntry, RecipeImport, ShareLink, ShoppingList,
- ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
- SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
- UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
+ ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
+ Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
+ SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
from cookbook.templatetags.custom_tags import markdown
-from recipes.settings import MEDIA_URL, AWS_ENABLED
+from recipes.settings import AWS_ENABLED, MEDIA_URL
class ExtendedRecipeMixin(serializers.ModelSerializer):
@@ -193,7 +193,8 @@ class SpaceSerializer(WritableNestedModelSerializer):
class Meta:
model = Space
- fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
+ fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users',
+ 'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
read_only_fields = ('id', 'created_by', 'created_at', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo',)
@@ -815,7 +816,7 @@ class RecipeBookEntrySerializer(serializers.ModelSerializer):
book = validated_data['book']
recipe = validated_data['recipe']
if not book.get_owner() == self.context['request'].user and not self.context[
- 'request'].user in book.get_shared():
+ 'request'].user in book.get_shared():
raise NotFound(detail=None, code=None)
obj, created = RecipeBookEntry.objects.get_or_create(book=book, recipe=recipe)
return obj
@@ -871,11 +872,11 @@ class ShoppingListRecipeSerializer(serializers.ModelSerializer):
value = value.quantize(
Decimal(1)) if value == value.to_integral() else value.normalize() # strips trailing zero
return (
- obj.name
- or getattr(obj.mealplan, 'title', None)
- or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
- or obj.recipe.name
- ) + f' ({value:.2g})'
+ obj.name
+ or getattr(obj.mealplan, 'title', None)
+ or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
+ or obj.recipe.name
+ ) + f' ({value:.2g})'
def update(self, instance, validated_data):
# TODO remove once old shopping list
@@ -1232,6 +1233,6 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
# non model serializers
class RecipeFromSourceSerializer(serializers.Serializer):
- url = serializers.CharField(max_length=4096, required=False, allow_null=True)
+ url = serializers.CharField(max_length=4096, required=False, allow_null=True, allow_blank=True)
data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
bookmarklet = serializers.IntegerField(required=False, allow_null=True, )
diff --git a/cookbook/views/api.py b/cookbook/views/api.py
index 7a49261be..54df51bfb 100644
--- a/cookbook/views/api.py
+++ b/cookbook/views/api.py
@@ -5,20 +5,20 @@ import re
import traceback
import uuid
from collections import OrderedDict
+from json import JSONDecodeError
+from urllib.parse import unquote
from zipfile import ZipFile
import requests
import validators
-from PIL import UnidentifiedImageError
from annoying.decorators import ajax_request
from annoying.functions import get_object_or_None
from django.contrib import messages
-from django.contrib.auth.models import User, Group
+from django.contrib.auth.models import Group, User
from django.contrib.postgres.search import TrigramSimilarity
from django.core.exceptions import FieldError, ValidationError
from django.core.files import File
-from django.db.models import (Case, Count, Exists, OuterRef, ProtectedError, Q,
- Subquery, Value, When)
+from django.db.models import Case, Count, Exists, OuterRef, ProtectedError, Q, Subquery, Value, When
from django.db.models.fields.related import ForeignObjectRel
from django.db.models.functions import Coalesce, Lower
from django.http import FileResponse, HttpResponse, JsonResponse
@@ -27,6 +27,9 @@ from django.urls import reverse
from django.utils.translation import gettext as _
from django_scopes import scopes_disabled
from icalendar import Calendar, Event
+from PIL import UnidentifiedImageError
+from recipe_scrapers import scrape_html, scrape_me
+from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
from requests.exceptions import MissingSchema
from rest_framework import decorators, status, viewsets
from rest_framework.authtoken.models import Token
@@ -41,43 +44,47 @@ from rest_framework.throttling import AnonRateThrottle
from rest_framework.viewsets import ViewSetMixin
from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
+from cookbook.helper import recipe_url_import as helper
from cookbook.helper.HelperFunctions import str2bool
from cookbook.helper.image_processing import handle_image
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, CustomIsOwner,
- CustomIsShare, CustomIsShared, CustomIsUser,
- group_required, CustomIsSpaceOwner, switch_user_active_space, is_space_owner, CustomIsOwnerReadOnly)
-from cookbook.helper.recipe_html_import import get_recipe_from_source
+ CustomIsOwnerReadOnly, CustomIsShare, CustomIsShared,
+ CustomIsSpaceOwner, CustomIsUser, group_required,
+ is_space_owner, switch_user_active_space)
from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search
-from cookbook.helper.recipe_url_import import get_from_youtube_scraper
+from cookbook.helper.recipe_url_import import get_from_youtube_scraper, get_images_from_soup
+from cookbook.helper.scrapers.scrapers import text_scraper
from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
- FoodInheritField, ImportLog, Ingredient, Keyword, MealPlan, MealType,
- Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
- ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
- SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
- UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
+ FoodInheritField, ImportLog, Ingredient, InviteLink, Keyword, MealPlan,
+ MealType, Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
+ ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
+ Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
+ SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
from cookbook.provider.dropbox import Dropbox
from cookbook.provider.local import Local
from cookbook.provider.nextcloud import Nextcloud
from cookbook.schemas import FilterSchema, QueryParam, QueryParamAutoSchema, TreeSchema
-from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializer,
- CookLogSerializer, CustomFilterSerializer, ExportLogSerializer,
+from cookbook.serializer import (AutomationSerializer, BookmarkletImportListSerializer,
+ BookmarkletImportSerializer, CookLogSerializer,
+ CustomFilterSerializer, ExportLogSerializer,
FoodInheritFieldSerializer, FoodSerializer,
- FoodShoppingUpdateSerializer, ImportLogSerializer,
- IngredientSerializer, KeywordSerializer, MealPlanSerializer,
+ FoodShoppingUpdateSerializer, GroupSerializer, ImportLogSerializer,
+ IngredientSerializer, IngredientSimpleSerializer,
+ InviteLinkSerializer, KeywordSerializer, MealPlanSerializer,
MealTypeSerializer, RecipeBookEntrySerializer,
- RecipeBookSerializer, RecipeImageSerializer,
- RecipeOverviewSerializer, RecipeSerializer,
+ RecipeBookSerializer, RecipeFromSourceSerializer,
+ RecipeImageSerializer, RecipeOverviewSerializer, RecipeSerializer,
RecipeShoppingUpdateSerializer, RecipeSimpleSerializer,
ShoppingListAutoSyncSerializer, ShoppingListEntrySerializer,
ShoppingListRecipeSerializer, ShoppingListSerializer,
- StepSerializer, StorageSerializer,
+ SpaceSerializer, StepSerializer, StorageSerializer,
SupermarketCategoryRelationSerializer,
SupermarketCategorySerializer, SupermarketSerializer,
SyncLogSerializer, SyncSerializer, UnitSerializer,
UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
- ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer, SpaceSerializer, UserSpaceSerializer, GroupSerializer, InviteLinkSerializer)
+ UserSpaceSerializer, ViewLogSerializer)
from recipes import settings
@@ -713,7 +720,7 @@ class RecipeViewSet(viewsets.ModelViewSet):
'Query string matched (fuzzy) against recipe name. In the future also fulltext search.')),
QueryParam(name='keywords', description=_(
'ID of keyword a recipe should have. For multiple repeat parameter. Equivalent to keywords_or'),
- qtype='int'),
+ qtype='int'),
QueryParam(name='keywords_or',
description=_('Keyword IDs, repeat for multiple. Return recipes with any of the keywords'),
qtype='int'),
@@ -1114,69 +1121,79 @@ def recipe_from_source(request):
- url: url to use for importing recipe
- data: if no url is given recipe is imported from provided source data
- (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
- :return: JsonResponse containing the parsed json, original html,json and images
+ :return: JsonResponse containing the parsed json and images
"""
+ scrape = None
serializer = RecipeFromSourceSerializer(data=request.data)
if serializer.is_valid():
- try:
- if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
- serializer.validated_data['url'] = bookmarklet.url
- serializer.validated_data['data'] = bookmarklet.html
- bookmarklet.delete()
- except KeyError:
- pass
- # headers to use for request to external sites
- external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
+ if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
+ serializer.validated_data['url'] = bookmarklet.url
+ serializer.validated_data['data'] = bookmarklet.html
+ bookmarklet.delete()
- if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
+ url = serializer.validated_data.get('url', None)
+ data = unquote(serializer.validated_data.get('data', None))
+ if not url and not data:
return Response({
'error': True,
'msg': _('Nothing to do.')
}, status=status.HTTP_400_BAD_REQUEST)
- # in manual mode request complete page to return it later
- if 'url' in serializer.validated_data:
- if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', serializer.validated_data['url']):
- if validators.url(serializer.validated_data['url'], public=True):
+ elif url and not data:
+ if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', url):
+ if validators.url(url, public=True):
return Response({
- 'recipe_json': get_from_youtube_scraper(serializer.validated_data['url'], request),
- 'recipe_tree': '',
- 'recipe_html': '',
+ 'recipe_json': get_from_youtube_scraper(url, request),
+ # 'recipe_tree': '',
+ # 'recipe_html': '',
'recipe_images': [],
}, status=status.HTTP_200_OK)
- try:
- if validators.url(serializer.validated_data['url'], public=True):
- serializer.validated_data['data'] = requests.get(serializer.validated_data['url'], headers=external_request_headers).content
- else:
+ else:
+ try:
+ if validators.url(url, public=True):
+ scrape = scrape_me(url_path=url, wild_mode=True)
+
+ else:
+ return Response({
+ 'error': True,
+ 'msg': _('Invalid Url')
+ }, status=status.HTTP_400_BAD_REQUEST)
+ except NoSchemaFoundInWildMode:
+ pass
+ except requests.exceptions.ConnectionError:
return Response({
'error': True,
- 'msg': _('Invalid Url')
+ 'msg': _('Connection Refused.')
}, status=status.HTTP_400_BAD_REQUEST)
- except requests.exceptions.ConnectionError:
- return Response({
- 'error': True,
- 'msg': _('Connection Refused.')
- }, status=status.HTTP_400_BAD_REQUEST)
- except requests.exceptions.MissingSchema:
- return Response({
- 'error': True,
- 'msg': _('Bad URL Schema.')
- }, status=status.HTTP_400_BAD_REQUEST)
+ except requests.exceptions.MissingSchema:
+ return Response({
+ 'error': True,
+ 'msg': _('Bad URL Schema.')
+ }, status=status.HTTP_400_BAD_REQUEST)
+ else:
+ try:
+ json.loads(data)
+ data = ""
+ except JSONDecodeError:
+ pass
+ scrape = text_scraper(text=data, url=url)
+ if not url and (found_url := scrape.schema.data.get('url', None)):
+ scrape = text_scraper(text=data, url=found_url)
- recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request)
- if len(recipe_tree) == 0 and len(recipe_json) == 0:
+ if scrape:
+ return Response({
+ 'recipe_json': helper.get_from_scraper(scrape, request),
+ # 'recipe_tree': recipe_tree,
+ # 'recipe_html': recipe_html,
+ 'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
+ }, status=status.HTTP_200_OK)
+
+ else:
return Response({
'error': True,
'msg': _('No usable data could be found.')
}, status=status.HTTP_400_BAD_REQUEST)
- else:
- return Response({
- 'recipe_json': recipe_json,
- 'recipe_tree': recipe_tree,
- 'recipe_html': recipe_html,
- 'recipe_images': list(dict.fromkeys(recipe_images)),
- }, status=status.HTTP_200_OK)
else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
diff --git a/vue/src/apps/ImportView/ImportView.vue b/vue/src/apps/ImportView/ImportView.vue
index 407664929..4b2ed0ffd 100644
--- a/vue/src/apps/ImportView/ImportView.vue
+++ b/vue/src/apps/ImportView/ImportView.vue
@@ -461,8 +461,8 @@ export default {
recent_urls: [],
source_data: '',
recipe_json: undefined,
- recipe_html: undefined,
- recipe_tree: undefined,
+ // recipe_html: undefined,
+ // recipe_tree: undefined,
recipe_images: [],
imported_recipes: [],
failed_imports: [],
@@ -593,9 +593,9 @@ export default {
}
// reset all variables
- this.recipe_html = undefined
+ // this.recipe_html = undefined
this.recipe_json = undefined
- this.recipe_tree = undefined
+ // this.recipe_tree = undefined
this.recipe_images = []
// load recipe
@@ -621,8 +621,8 @@ export default {
return x
})
- this.recipe_tree = response.data['recipe_tree'];
- this.recipe_html = response.data['recipe_html'];
+ // this.recipe_tree = response.data['recipe_tree'];
+ // this.recipe_html = response.data['recipe_html'];
this.recipe_images = response.data['recipe_images'] !== undefined ? response.data['recipe_images'] : [];
if (!silent) {