Merge pull request #1917 from smilerz/bookmarklet_fix

Bookmarklet fix
2026-01-02 12:49:02 -05:00 · 2022-07-11 14:28:08 +02:00
parent 9eaf0f9530 e40b73f420
commit bb424cc3d6
8 changed files with 327 additions and 271 deletions
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -1,189 +1,191 @@
-import json
+# import json
-import re
+# import re
-from json import JSONDecodeError
+# from json import JSONDecodeError
-from urllib.parse import unquote
+# from urllib.parse import unquote
-from bs4 import BeautifulSoup
+# from bs4 import BeautifulSoup
-from bs4.element import Tag
+# from bs4.element import Tag
-from recipe_scrapers import scrape_html, scrape_me
+# from recipe_scrapers import scrape_html, scrape_me
-from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
+# from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
-from recipe_scrapers._utils import get_host_name, normalize_string
+# from recipe_scrapers._utils import get_host_name, normalize_string
-from cookbook.helper import recipe_url_import as helper
+# from cookbook.helper import recipe_url_import as helper
-from cookbook.helper.scrapers.scrapers import text_scraper
+# from cookbook.helper.scrapers.scrapers import text_scraper
-def get_recipe_from_source(text, url, request):
+# def get_recipe_from_source(text, url, request):
-    def build_node(k, v):
+#     def build_node(k, v):
-        if isinstance(v, dict):
+#         if isinstance(v, dict):
-            node = {
+#             node = {
-                'name': k,
+#                 'name': k,
-                'value': k,
+#                 'value': k,
-                'children': get_children_dict(v)
+#                 'children': get_children_dict(v)
-            }
+#             }
-        elif isinstance(v, list):
+#         elif isinstance(v, list):
-            node = {
+#             node = {
-                'name': k,
+#                 'name': k,
-                'value': k,
+#                 'value': k,
-                'children': get_children_list(v)
+#                 'children': get_children_list(v)
-            }
+#             }
-        else:
+#         else:
-            node = {
+#             node = {
-                'name': k + ": " + normalize_string(str(v)),
+#                 'name': k + ": " + normalize_string(str(v)),
-                'value': normalize_string(str(v))
+#                 'value': normalize_string(str(v))
-            }
+#             }
-        return node
+#         return node
-    def get_children_dict(children):
+#     def get_children_dict(children):
-        kid_list = []
+#         kid_list = []
-        for k, v in children.items():
+#         for k, v in children.items():
-            kid_list.append(build_node(k, v))
+#             kid_list.append(build_node(k, v))
-        return kid_list
+#         return kid_list
-    def get_children_list(children):
+#     def get_children_list(children):
-        kid_list = []
+#         kid_list = []
-        for kid in children:
+#         for kid in children:
-            if type(kid) == list:
+#             if type(kid) == list:
-                node = {
+#                 node = {
-                    'name': "unknown list",
+#                     'name': "unknown list",
-                    'value': "unknown list",
+#                     'value': "unknown list",
-                    'children': get_children_list(kid)
+#                     'children': get_children_list(kid)
-                }
+#                 }
-                kid_list.append(node)
+#                 kid_list.append(node)
-            elif type(kid) == dict:
+#             elif type(kid) == dict:
-                for k, v in kid.items():
+#                 for k, v in kid.items():
-                    kid_list.append(build_node(k, v))
+#                     kid_list.append(build_node(k, v))
-            else:
+#             else:
-                kid_list.append({
+#                 kid_list.append({
-                    'name': normalize_string(str(kid)),
+#                     'name': normalize_string(str(kid)),
-                    'value': normalize_string(str(kid))
+#                     'value': normalize_string(str(kid))
-                })
+#                 })
-        return kid_list
+#         return kid_list
-    recipe_tree = []
+#     recipe_tree = []
-    parse_list = []
+#     parse_list = []
-    soup = BeautifulSoup(text, "html.parser")
+#     soup = BeautifulSoup(text, "html.parser")
-    html_data = get_from_html(soup)
+#     html_data = get_from_html(soup)
-    images = get_images_from_source(soup, url)
+#     images = get_images_from_source(soup, url)
-    text = unquote(text)
+#     text = unquote(text)
-    scrape = None
+#     scrape = None
-    if url:
+#     if url and not text:
-        try:
+#         try:
-            scrape = scrape_me(url_path=url, wild_mode=True)
+#             scrape = scrape_me(url_path=url, wild_mode=True)
-        except(NoSchemaFoundInWildMode):
+#         except(NoSchemaFoundInWildMode):
-            pass
+#             pass
    if not scrape:
        try:
            parse_list.append(remove_graph(json.loads(text)))
            if not url and 'url' in parse_list[0]:
                url = parse_list[0]['url']
            scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
-        except JSONDecodeError:
+#     if not scrape:
-            for el in soup.find_all('script', type='application/ld+json'):
+#         try:
-                el = remove_graph(el)
+#             parse_list.append(remove_graph(json.loads(text)))
-                if not url and 'url' in el:
+#             if not url and 'url' in parse_list[0]:
-                    url = el['url']
+#                 url = parse_list[0]['url']
-                if type(el) == list:
+#             scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
                    for le in el:
                        parse_list.append(le)
                elif type(el) == dict:
                    parse_list.append(el)
            for el in soup.find_all(type='application/json'):
                el = remove_graph(el)
                if type(el) == list:
                    for le in el:
                        parse_list.append(le)
                elif type(el) == dict:
                    parse_list.append(el)
            scrape = text_scraper(text, url=url)
-    recipe_json = helper.get_from_scraper(scrape, request)
+#         except JSONDecodeError:
 #             for el in soup.find_all('script', type='application/ld+json'):
 #                 el = remove_graph(el)
 #                 if not url and 'url' in el:
 #                     url = el['url']
 #                 if type(el) == list:
 #                     for le in el:
 #                         parse_list.append(le)
 #                 elif type(el) == dict:
 #                     parse_list.append(el)
 #             for el in soup.find_all(type='application/json'):
 #                 el = remove_graph(el)
 #                 if type(el) == list:
 #                     for le in el:
 #                         parse_list.append(le)
 #                 elif type(el) == dict:
 #                     parse_list.append(el)
 #             scrape = text_scraper(text, url=url)
-    for el in parse_list:
+#     recipe_json = helper.get_from_scraper(scrape, request)
        temp_tree = []
        if isinstance(el, Tag):
            try:
                el = json.loads(el.string)
            except TypeError:
                continue
-        for k, v in el.items():
+#     # TODO: DEPRECATE recipe_tree & html_data.  first validate it isn't used anywhere
-            if isinstance(v, dict):
+#     for el in parse_list:
-                node = {
+#         temp_tree = []
-                    'name': k,
+#         if isinstance(el, Tag):
-                    'value': k,
+#             try:
-                    'children': get_children_dict(v)
+#                 el = json.loads(el.string)
-                }
+#             except TypeError:
-            elif isinstance(v, list):
+#                 continue
                node = {
                    'name': k,
                    'value': k,
                    'children': get_children_list(v)
                }
            else:
                node = {
                    'name': k + ": " + normalize_string(str(v)),
                    'value': normalize_string(str(v))
                }
            temp_tree.append(node)
-        if '@type' in el and el['@type'] == 'Recipe':
+#         for k, v in el.items():
-            recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
+#             if isinstance(v, dict):
-        else:
+#                 node = {
-            recipe_tree += [{'name': 'json', 'children': temp_tree}]
+#                     'name': k,
 #                     'value': k,
 #                     'children': get_children_dict(v)
 #                 }
 #             elif isinstance(v, list):
 #                 node = {
 #                     'name': k,
 #                     'value': k,
 #                     'children': get_children_list(v)
 #                 }
 #             else:
 #                 node = {
 #                     'name': k + ": " + normalize_string(str(v)),
 #                     'value': normalize_string(str(v))
 #                 }
 #             temp_tree.append(node)
-    return recipe_json, recipe_tree, html_data, images
+#         if '@type' in el and el['@type'] == 'Recipe':
 #             recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
 #         else:
 #             recipe_tree += [{'name': 'json', 'children': temp_tree}]
 #     return recipe_json, recipe_tree, html_data, images
-def get_from_html(soup):
+# def get_from_html(soup):
-    INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
+#     INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
-    html = []
+#     html = []
-    for s in soup.strings:
+#     for s in soup.strings:
-        if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
+#         if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
-            html.append(s)
+#             html.append(s)
-    return html
+#     return html
-def get_images_from_source(soup, url):
+# def get_images_from_source(soup, url):
-    sources = ['src', 'srcset', 'data-src']
+#     sources = ['src', 'srcset', 'data-src']
-    images = []
+#     images = []
-    img_tags = soup.find_all('img')
+#     img_tags = soup.find_all('img')
-    if url:
+#     if url:
-        site = get_host_name(url)
+#         site = get_host_name(url)
-        prot = url.split(':')[0]
+#         prot = url.split(':')[0]
-    urls = []
+#     urls = []
-    for img in img_tags:
+#     for img in img_tags:
-        for src in sources:
+#         for src in sources:
-            try:
+#             try:
-                urls.append(img[src])
+#                 urls.append(img[src])
-            except KeyError:
+#             except KeyError:
-                pass
+#                 pass
-    for u in urls:
+#     for u in urls:
-        u = u.split('?')[0]
+#         u = u.split('?')[0]
-        filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
+#         filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
-        if filename:
+#         if filename:
-            if (('http' not in u) and (url)):
+#             if (('http' not in u) and (url)):
-                # sometimes an image source can be relative
+#                 # sometimes an image source can be relative
-                # if it is provide the base url
+#                 # if it is provide the base url
-                u = '{}://{}{}'.format(prot, site, u)
+#                 u = '{}://{}{}'.format(prot, site, u)
-            if 'http' in u:
+#             if 'http' in u:
-                images.append(u)
+#                 images.append(u)
-    return images
+#     return images
-def remove_graph(el):
+# def remove_graph(el):
-    # recipes type might be wrapped in @graph type
+#     # recipes type might be wrapped in @graph type
-    if isinstance(el, Tag):
+#     if isinstance(el, Tag):
-        try:
+#         try:
-            el = json.loads(el.string)
+#             el = json.loads(el.string)
-            if '@graph' in el:
+#             if '@graph' in el:
-                for x in el['@graph']:
+#                 for x in el['@graph']:
-                    if '@type' in x and x['@type'] == 'Recipe':
+#                     if '@type' in x and x['@type'] == 'Recipe':
-                        el = x
+#                         el = x
-        except (TypeError, JSONDecodeError):
+#         except (TypeError, JSONDecodeError):
-            pass
+#             pass
-    return el
+#     return el
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -1,21 +1,19 @@
 import random
 import re
 from html import unescape
 from pytube import YouTube
 from unicodedata import decomposition
 from django.utils.dateparse import parse_duration
 from django.utils.translation import gettext as _
 from isodate import parse_duration as iso_parse_duration
 from isodate.isoerror import ISO8601Error
-from recipe_scrapers._utils import get_minutes
+from pytube import YouTube
 from recipe_scrapers._utils import get_host_name, get_minutes
 from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.models import Keyword
 # from recipe_scrapers._utils import get_minutes  ## temporary until/unless upstream incorporates get_minutes() PR
@@ -369,3 +367,32 @@ def iso_duration_to_minutes(string):
        string
    ).groupdict()
    return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)
 def get_images_from_soup(soup, url):
    sources = ['src', 'srcset', 'data-src']
    images = []
    img_tags = soup.find_all('img')
    if url:
        site = get_host_name(url)
        prot = url.split(':')[0]
    urls = []
    for img in img_tags:
        for src in sources:
            try:
                urls.append(img[src])
            except KeyError:
                pass
    for u in urls:
        u = u.split('?')[0]
        filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
        if filename:
            if (('http' not in u) and (url)):
                # sometimes an image source can be relative
                # if it is provide the base url
                u = '{}://{}{}'.format(prot, site, u)
            if 'http' in u:
                images.append(u)
    return images
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -1,6 +1,7 @@
 from bs4 import BeautifulSoup
 from json import JSONDecodeError
-from recipe_scrapers import SCRAPERS 
+
 from bs4 import BeautifulSoup
 from recipe_scrapers import SCRAPERS, get_host_name
 from recipe_scrapers._factory import SchemaScraperFactory
 from recipe_scrapers._schemaorg import SchemaOrg
@@ -15,22 +16,28 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
 def text_scraper(text, url=None):
-    scraper_class = SchemaScraperFactory.SchemaScraper
+    domain = None
    if url:
        domain = get_host_name(url)
    if domain in SCRAPERS:
        scraper_class = SCRAPERS[domain]
    else:
        scraper_class = SchemaScraperFactory.SchemaScraper
    class TextScraper(scraper_class):
        def __init__(
            self,
-            page_data,
+            html=None,
-            url=None
+            url=None,
        ):
            self.wild_mode = False
            self.meta_http_equiv = False
-            self.soup = BeautifulSoup(page_data, "html.parser")
+            self.soup = BeautifulSoup(html, "html.parser")
            self.url = url
            self.recipe = None
            try:
-                self.schema = SchemaOrg(page_data)
+                self.schema = SchemaOrg(html)
            except (JSONDecodeError, AttributeError):
                pass
-    return TextScraper(text, url)
+    return TextScraper(url=url, html=text)
--- a/cookbook/integration/cookbookapp.py
+++ b/cookbook/integration/cookbookapp.py
@@ -10,8 +10,8 @@ import validators
 import yaml
 from cookbook.helper.ingredient_parser import IngredientParser
-from cookbook.helper.recipe_html_import import get_recipe_from_source
+from cookbook.helper.recipe_url_import import get_images_from_soup, iso_duration_to_minutes
-from cookbook.helper.recipe_url_import import iso_duration_to_minutes
+from cookbook.helper.scrapers.scrapers import text_scraper
 from cookbook.integration.integration import Integration
 from cookbook.models import Ingredient, Keyword, Recipe, Step
@@ -24,7 +24,10 @@ class CookBookApp(Integration):
    def get_recipe_from_file(self, file):
        recipe_html = file.getvalue().decode("utf-8")
-        recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
+        # recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
        scrape = text_scraper(text=data)
        recipe_json = helper.get_from_scraper(scrape, request)
        images = list(dict.fromkeys(get_images_from_soup(scrape.soup, url)))
        recipe = Recipe.objects.create(
            name=recipe_json['name'].strip(),
--- a/cookbook/integration/copymethat.py
+++ b/cookbook/integration/copymethat.py
@@ -3,10 +3,9 @@ from io import BytesIO
 from zipfile import ZipFile
 from bs4 import BeautifulSoup
 from django.utils.translation import gettext as _
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.helper.recipe_html_import import get_recipe_from_source
 from cookbook.helper.recipe_url_import import iso_duration_to_minutes, parse_servings
 from cookbook.integration.integration import Integration
 from cookbook.models import Ingredient, Keyword, Recipe, Step
--- a/cookbook/serializer.py
+++ b/cookbook/serializer.py
@@ -1,12 +1,11 @@
 import traceback
-from datetime import timedelta, datetime
+from datetime import datetime, timedelta
 from decimal import Decimal
 from gettext import gettext as _
 from html import escape
 from smtplib import SMTPException
-from PIL import Image
+from django.contrib.auth.models import Group, User
 from django.contrib.auth.models import User, Group
 from django.core.mail import send_mail
 from django.db.models import Avg, Q, QuerySet, Sum
 from django.http import BadHeaderError
@@ -14,6 +13,7 @@ from django.urls import reverse
 from django.utils import timezone
 from django_scopes import scopes_disabled
 from drf_writable_nested import UniqueFieldsMixin, WritableNestedModelSerializer
 from PIL import Image
 from rest_framework import serializers
 from rest_framework.exceptions import NotFound, ValidationError
@@ -22,14 +22,14 @@ from cookbook.helper.HelperFunctions import str2bool
 from cookbook.helper.permission_helper import above_space_limit
 from cookbook.helper.shopping_helper import RecipeShoppingEditor
 from cookbook.models import (Automation, BookmarkletImport, Comment, CookLog, CustomFilter,
-                             ExportLog, Food, FoodInheritField, ImportLog, Ingredient, Keyword,
+                             ExportLog, Food, FoodInheritField, ImportLog, Ingredient, InviteLink,
-                             MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
+                             Keyword, MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
                             RecipeBookEntry, RecipeImport, ShareLink, ShoppingList,
-                             ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
+                             ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
-                             SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
+                             Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
-                             UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
+                             SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
 from cookbook.templatetags.custom_tags import markdown
-from recipes.settings import MEDIA_URL, AWS_ENABLED
+from recipes.settings import AWS_ENABLED, MEDIA_URL
 class ExtendedRecipeMixin(serializers.ModelSerializer):
@@ -193,7 +193,8 @@ class SpaceSerializer(WritableNestedModelSerializer):
    class Meta:
        model = Space
-        fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
+        fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users',
                  'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
        read_only_fields = ('id', 'created_by', 'created_at', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo',)
@@ -815,7 +816,7 @@ class RecipeBookEntrySerializer(serializers.ModelSerializer):
        book = validated_data['book']
        recipe = validated_data['recipe']
        if not book.get_owner() == self.context['request'].user and not self.context[
-                                                                            'request'].user in book.get_shared():
+                'request'].user in book.get_shared():
            raise NotFound(detail=None, code=None)
        obj, created = RecipeBookEntry.objects.get_or_create(book=book, recipe=recipe)
        return obj
@@ -871,11 +872,11 @@ class ShoppingListRecipeSerializer(serializers.ModelSerializer):
        value = value.quantize(
            Decimal(1)) if value == value.to_integral() else value.normalize()  # strips trailing zero
        return (
-                       obj.name
+            obj.name
-                       or getattr(obj.mealplan, 'title', None)
+            or getattr(obj.mealplan, 'title', None)
-                       or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
+            or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
-                       or obj.recipe.name
+            or obj.recipe.name
-               ) + f' ({value:.2g})'
+        ) + f' ({value:.2g})'
    def update(self, instance, validated_data):
        # TODO remove once old shopping list
@@ -1232,6 +1233,6 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
 # non model serializers
 class RecipeFromSourceSerializer(serializers.Serializer):
-    url = serializers.CharField(max_length=4096, required=False, allow_null=True)
+    url = serializers.CharField(max_length=4096, required=False, allow_null=True, allow_blank=True)
    data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
    bookmarklet = serializers.IntegerField(required=False, allow_null=True, )
--- a/cookbook/views/api.py
+++ b/cookbook/views/api.py
@@ -5,20 +5,20 @@ import re
 import traceback
 import uuid
 from collections import OrderedDict
 from json import JSONDecodeError
 from urllib.parse import unquote
 from zipfile import ZipFile
 import requests
 import validators
 from PIL import UnidentifiedImageError
 from annoying.decorators import ajax_request
 from annoying.functions import get_object_or_None
 from django.contrib import messages
-from django.contrib.auth.models import User, Group
+from django.contrib.auth.models import Group, User
 from django.contrib.postgres.search import TrigramSimilarity
 from django.core.exceptions import FieldError, ValidationError
 from django.core.files import File
-from django.db.models import (Case, Count, Exists, OuterRef, ProtectedError, Q,
+from django.db.models import Case, Count, Exists, OuterRef, ProtectedError, Q, Subquery, Value, When
                              Subquery, Value, When)
 from django.db.models.fields.related import ForeignObjectRel
 from django.db.models.functions import Coalesce, Lower
 from django.http import FileResponse, HttpResponse, JsonResponse
@@ -27,6 +27,9 @@ from django.urls import reverse
 from django.utils.translation import gettext as _
 from django_scopes import scopes_disabled
 from icalendar import Calendar, Event
 from PIL import UnidentifiedImageError
 from recipe_scrapers import scrape_html, scrape_me
 from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
 from requests.exceptions import MissingSchema
 from rest_framework import decorators, status, viewsets
 from rest_framework.authtoken.models import Token
@@ -41,43 +44,47 @@ from rest_framework.throttling import AnonRateThrottle
 from rest_framework.viewsets import ViewSetMixin
 from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
 from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.HelperFunctions import str2bool
 from cookbook.helper.image_processing import handle_image
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, CustomIsOwner,
-                                               CustomIsShare, CustomIsShared, CustomIsUser,
+                                               CustomIsOwnerReadOnly, CustomIsShare, CustomIsShared,
-                                               group_required, CustomIsSpaceOwner, switch_user_active_space, is_space_owner, CustomIsOwnerReadOnly)
+                                               CustomIsSpaceOwner, CustomIsUser, group_required,
-from cookbook.helper.recipe_html_import import get_recipe_from_source
+                                               is_space_owner, switch_user_active_space)
 from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search
-from cookbook.helper.recipe_url_import import get_from_youtube_scraper
+from cookbook.helper.recipe_url_import import get_from_youtube_scraper, get_images_from_soup
 from cookbook.helper.scrapers.scrapers import text_scraper
 from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
 from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
-                             FoodInheritField, ImportLog, Ingredient, Keyword, MealPlan, MealType,
+                             FoodInheritField, ImportLog, Ingredient, InviteLink, Keyword, MealPlan,
-                             Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
+                             MealType, Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
-                             ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
+                             ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
-                             SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
+                             Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
-                             UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
+                             SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
 from cookbook.provider.dropbox import Dropbox
 from cookbook.provider.local import Local
 from cookbook.provider.nextcloud import Nextcloud
 from cookbook.schemas import FilterSchema, QueryParam, QueryParamAutoSchema, TreeSchema
-from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializer,
+from cookbook.serializer import (AutomationSerializer, BookmarkletImportListSerializer,
-                                 CookLogSerializer, CustomFilterSerializer, ExportLogSerializer,
+                                 BookmarkletImportSerializer, CookLogSerializer,
                                 CustomFilterSerializer, ExportLogSerializer,
                                 FoodInheritFieldSerializer, FoodSerializer,
-                                 FoodShoppingUpdateSerializer, ImportLogSerializer,
+                                 FoodShoppingUpdateSerializer, GroupSerializer, ImportLogSerializer,
-                                 IngredientSerializer, KeywordSerializer, MealPlanSerializer,
+                                 IngredientSerializer, IngredientSimpleSerializer,
                                 InviteLinkSerializer, KeywordSerializer, MealPlanSerializer,
                                 MealTypeSerializer, RecipeBookEntrySerializer,
-                                 RecipeBookSerializer, RecipeImageSerializer,
+                                 RecipeBookSerializer, RecipeFromSourceSerializer,
-                                 RecipeOverviewSerializer, RecipeSerializer,
+                                 RecipeImageSerializer, RecipeOverviewSerializer, RecipeSerializer,
                                 RecipeShoppingUpdateSerializer, RecipeSimpleSerializer,
                                 ShoppingListAutoSyncSerializer, ShoppingListEntrySerializer,
                                 ShoppingListRecipeSerializer, ShoppingListSerializer,
-                                 StepSerializer, StorageSerializer,
+                                 SpaceSerializer, StepSerializer, StorageSerializer,
                                 SupermarketCategoryRelationSerializer,
                                 SupermarketCategorySerializer, SupermarketSerializer,
                                 SyncLogSerializer, SyncSerializer, UnitSerializer,
                                 UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
-                                 ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer, SpaceSerializer, UserSpaceSerializer, GroupSerializer, InviteLinkSerializer)
+                                 UserSpaceSerializer, ViewLogSerializer)
 from recipes import settings
@@ -713,7 +720,7 @@ class RecipeViewSet(viewsets.ModelViewSet):
            'Query string matched (fuzzy) against recipe name. In the future also fulltext search.')),
        QueryParam(name='keywords', description=_(
            'ID of keyword a recipe should have. For multiple repeat parameter. Equivalent to keywords_or'),
-                   qtype='int'),
+            qtype='int'),
        QueryParam(name='keywords_or',
                   description=_('Keyword IDs, repeat for multiple. Return recipes with any of the keywords'),
                   qtype='int'),
@@ -1114,69 +1121,79 @@ def recipe_from_source(request):
            - url: url to use for importing recipe
            - data: if no url is given recipe is imported from provided source data
            - (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
-    :return: JsonResponse containing the parsed json, original html,json and images
+    :return: JsonResponse containing the parsed json and images
    """
    scrape = None
    serializer = RecipeFromSourceSerializer(data=request.data)
    if serializer.is_valid():
        try:
            if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
                serializer.validated_data['url'] = bookmarklet.url
                serializer.validated_data['data'] = bookmarklet.html
                bookmarklet.delete()
        except KeyError:
            pass
-        # headers to use for request to external sites
+        if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
-        external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
+            serializer.validated_data['url'] = bookmarklet.url
            serializer.validated_data['data'] = bookmarklet.html
            bookmarklet.delete()
-        if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
+        url = serializer.validated_data.get('url', None)
        data = unquote(serializer.validated_data.get('data', None))
        if not url and not data:
            return Response({
                'error': True,
                'msg': _('Nothing to do.')
            }, status=status.HTTP_400_BAD_REQUEST)
-        # in manual mode request complete page to return it later
+        elif url and not data:
-        if 'url' in serializer.validated_data:
+            if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', url):
-            if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', serializer.validated_data['url']):
+                if validators.url(url, public=True):
                if validators.url(serializer.validated_data['url'], public=True):
                    return Response({
-                        'recipe_json': get_from_youtube_scraper(serializer.validated_data['url'], request),
+                        'recipe_json': get_from_youtube_scraper(url, request),
-                        'recipe_tree': '',
+                        # 'recipe_tree': '',
-                        'recipe_html': '',
+                        # 'recipe_html': '',
                        'recipe_images': [],
                    }, status=status.HTTP_200_OK)
-            try:
+            else:
-                if validators.url(serializer.validated_data['url'], public=True):
+                try:
-                    serializer.validated_data['data'] = requests.get(serializer.validated_data['url'], headers=external_request_headers).content
+                    if validators.url(url, public=True):
-                else:
+                        scrape = scrape_me(url_path=url, wild_mode=True)
                    else:
                        return Response({
                            'error': True,
                            'msg': _('Invalid Url')
                        }, status=status.HTTP_400_BAD_REQUEST)
                except NoSchemaFoundInWildMode:
                    pass
                except requests.exceptions.ConnectionError:
                    return Response({
                        'error': True,
-                        'msg': _('Invalid Url')
+                        'msg': _('Connection Refused.')
                    }, status=status.HTTP_400_BAD_REQUEST)
-            except requests.exceptions.ConnectionError:
+                except requests.exceptions.MissingSchema:
-                return Response({
+                    return Response({
-                    'error': True,
+                        'error': True,
-                    'msg': _('Connection Refused.')
+                        'msg': _('Bad URL Schema.')
-                }, status=status.HTTP_400_BAD_REQUEST)
+                    }, status=status.HTTP_400_BAD_REQUEST)
-            except requests.exceptions.MissingSchema:
+        else:
-                return Response({
+            try:
-                    'error': True,
+                json.loads(data)
-                    'msg': _('Bad URL Schema.')
+                data = "<script type='application/ld+json'>" + data + "</script>"
-                }, status=status.HTTP_400_BAD_REQUEST)
+            except JSONDecodeError:
                pass
            scrape = text_scraper(text=data, url=url)
            if not url and (found_url := scrape.schema.data.get('url', None)):
                scrape = text_scraper(text=data, url=found_url)
-        recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request)
+        if scrape:
-        if len(recipe_tree) == 0 and len(recipe_json) == 0:
+            return Response({
                'recipe_json': helper.get_from_scraper(scrape, request),
                # 'recipe_tree': recipe_tree,
                # 'recipe_html': recipe_html,
                'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
            }, status=status.HTTP_200_OK)
        else:
            return Response({
                'error': True,
                'msg': _('No usable data could be found.')
            }, status=status.HTTP_400_BAD_REQUEST)
        else:
            return Response({
                'recipe_json': recipe_json,
                'recipe_tree': recipe_tree,
                'recipe_html': recipe_html,
                'recipe_images': list(dict.fromkeys(recipe_images)),
            }, status=status.HTTP_200_OK)
    else:
        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
--- a/vue/src/apps/ImportView/ImportView.vue
+++ b/vue/src/apps/ImportView/ImportView.vue
@@ -461,8 +461,8 @@ export default {
            recent_urls: [],
            source_data: '',
            recipe_json: undefined,
-            recipe_html: undefined,
+            // recipe_html: undefined,
-            recipe_tree: undefined,
+            // recipe_tree: undefined,
            recipe_images: [],
            imported_recipes: [],
            failed_imports: [],
@@ -593,9 +593,9 @@ export default {
            }
            // reset all variables
-            this.recipe_html = undefined
+            // this.recipe_html = undefined
            this.recipe_json = undefined
-            this.recipe_tree = undefined
+            // this.recipe_tree = undefined
            this.recipe_images = []
            // load recipe
@@ -621,8 +621,8 @@ export default {
                    return x
                })
-                this.recipe_tree = response.data['recipe_tree'];
+                // this.recipe_tree = response.data['recipe_tree'];
-                this.recipe_html = response.data['recipe_html'];
+                // this.recipe_html = response.data['recipe_html'];
                this.recipe_images = response.data['recipe_images'] !== undefined ? response.data['recipe_images'] : [];
                if (!silent) {