From e23d514d894f299d003767bbdb829d2c6c495e0d Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Wed, 6 Jul 2022 16:16:53 -0500
Subject: [PATCH 1/4] fix bookmarklet

---
 cookbook/helper/recipe_html_import.py | 13 +++---
 cookbook/helper/scrapers/scrapers.py  | 13 ++++--
 cookbook/serializer.py                | 35 ++++++++--------
 cookbook/views/api.py                 | 59 +++++++++++++--------------
 4 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index 1b5d37ad2..48dc4c119 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -6,7 +6,7 @@ from urllib.parse import unquote
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from recipe_scrapers import scrape_html, scrape_me
-from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
+from recipe_scrapers._exceptions import NoSchemaFoundInWildMode, WebsiteNotImplementedError
 from recipe_scrapers._utils import get_host_name, normalize_string
 
 from cookbook.helper import recipe_url_import as helper
@@ -68,11 +68,14 @@ def get_recipe_from_source(text, url, request):
     text = unquote(text)
     scrape = None
 
-    if url:
+    if url and not text:
         try:
-            scrape = scrape_me(url_path=url, wild_mode=True)
-        except(NoSchemaFoundInWildMode):
-            pass
+            scrape = scrape_me(url_path=url)
+        except WebsiteNotImplementedError:
+            try:
+                scrape = scrape_me(url_path=url, wild_mode=True)
+            except(NoSchemaFoundInWildMode):
+                pass
     if not scrape:
         try:
             parse_list.append(remove_graph(json.loads(text)))
diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py
index eb93cc2c2..94e3daea0 100644
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -1,6 +1,7 @@
-from bs4 import BeautifulSoup
 from json import JSONDecodeError
-from recipe_scrapers import SCRAPERS 
+
+from bs4 import BeautifulSoup
+from recipe_scrapers import SCRAPERS, get_host_name
 from recipe_scrapers._factory import SchemaScraperFactory
 from recipe_scrapers._schemaorg import SchemaOrg
 
@@ -15,7 +16,13 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
 
 
 def text_scraper(text, url=None):
-    scraper_class = SchemaScraperFactory.SchemaScraper
+    domain = None
+    if url:
+        domain = get_host_name(url)
+    if domain in SCRAPERS:
+        scraper_class = SCRAPERS[domain]
+    else:
+        scraper_class = SchemaScraperFactory.SchemaScraper
 
     class TextScraper(scraper_class):
         def __init__(
diff --git a/cookbook/serializer.py b/cookbook/serializer.py
index 1e386c5dd..a4b8b2fe9 100644
--- a/cookbook/serializer.py
+++ b/cookbook/serializer.py
@@ -1,12 +1,11 @@
 import traceback
-from datetime import timedelta, datetime
+from datetime import datetime, timedelta
 from decimal import Decimal
 from gettext import gettext as _
 from html import escape
 from smtplib import SMTPException
 
-from PIL import Image
-from django.contrib.auth.models import User, Group
+from django.contrib.auth.models import Group, User
 from django.core.mail import send_mail
 from django.db.models import Avg, Q, QuerySet, Sum
 from django.http import BadHeaderError
@@ -14,6 +13,7 @@ from django.urls import reverse
 from django.utils import timezone
 from django_scopes import scopes_disabled
 from drf_writable_nested import UniqueFieldsMixin, WritableNestedModelSerializer
+from PIL import Image
 from rest_framework import serializers
 from rest_framework.exceptions import NotFound, ValidationError
 
@@ -22,14 +22,14 @@ from cookbook.helper.HelperFunctions import str2bool
 from cookbook.helper.permission_helper import above_space_limit
 from cookbook.helper.shopping_helper import RecipeShoppingEditor
 from cookbook.models import (Automation, BookmarkletImport, Comment, CookLog, CustomFilter,
-                             ExportLog, Food, FoodInheritField, ImportLog, Ingredient, Keyword,
-                             MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
+                             ExportLog, Food, FoodInheritField, ImportLog, Ingredient, InviteLink,
+                             Keyword, MealPlan, MealType, NutritionInformation, Recipe, RecipeBook,
                              RecipeBookEntry, RecipeImport, ShareLink, ShoppingList,
-                             ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
-                             SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
-                             UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
+                             ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
+                             Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
+                             SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
 from cookbook.templatetags.custom_tags import markdown
-from recipes.settings import MEDIA_URL, AWS_ENABLED
+from recipes.settings import AWS_ENABLED, MEDIA_URL
 
 
 class ExtendedRecipeMixin(serializers.ModelSerializer):
@@ -193,7 +193,8 @@ class SpaceSerializer(WritableNestedModelSerializer):
 
     class Meta:
         model = Space
-        fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
+        fields = ('id', 'name', 'created_by', 'created_at', 'message', 'max_recipes', 'max_file_storage_mb', 'max_users',
+                  'allow_sharing', 'demo', 'food_inherit', 'show_facet_count', 'user_count', 'recipe_count', 'file_size_mb',)
         read_only_fields = ('id', 'created_by', 'created_at', 'max_recipes', 'max_file_storage_mb', 'max_users', 'allow_sharing', 'demo',)
 
 
@@ -815,7 +816,7 @@ class RecipeBookEntrySerializer(serializers.ModelSerializer):
         book = validated_data['book']
         recipe = validated_data['recipe']
         if not book.get_owner() == self.context['request'].user and not self.context[
-                                                                            'request'].user in book.get_shared():
+                'request'].user in book.get_shared():
             raise NotFound(detail=None, code=None)
         obj, created = RecipeBookEntry.objects.get_or_create(book=book, recipe=recipe)
         return obj
@@ -871,11 +872,11 @@ class ShoppingListRecipeSerializer(serializers.ModelSerializer):
         value = value.quantize(
             Decimal(1)) if value == value.to_integral() else value.normalize()  # strips trailing zero
         return (
-                       obj.name
-                       or getattr(obj.mealplan, 'title', None)
-                       or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
-                       or obj.recipe.name
-               ) + f' ({value:.2g})'
+            obj.name
+            or getattr(obj.mealplan, 'title', None)
+            or (d := getattr(obj.mealplan, 'date', None)) and ': '.join([obj.mealplan.recipe.name, str(d)])
+            or obj.recipe.name
+        ) + f' ({value:.2g})'
 
     def update(self, instance, validated_data):
         # TODO remove once old shopping list
@@ -1232,6 +1233,6 @@ class FoodShoppingUpdateSerializer(serializers.ModelSerializer):
 # non model serializers
 
 class RecipeFromSourceSerializer(serializers.Serializer):
-    url = serializers.CharField(max_length=4096, required=False, allow_null=True)
+    url = serializers.CharField(max_length=4096, required=False, allow_null=True, allow_blank=True)
     data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
     bookmarklet = serializers.IntegerField(required=False, allow_null=True, )
diff --git a/cookbook/views/api.py b/cookbook/views/api.py
index 7a49261be..9b077b5c1 100644
--- a/cookbook/views/api.py
+++ b/cookbook/views/api.py
@@ -9,16 +9,14 @@ from zipfile import ZipFile
 
 import requests
 import validators
-from PIL import UnidentifiedImageError
 from annoying.decorators import ajax_request
 from annoying.functions import get_object_or_None
 from django.contrib import messages
-from django.contrib.auth.models import User, Group
+from django.contrib.auth.models import Group, User
 from django.contrib.postgres.search import TrigramSimilarity
 from django.core.exceptions import FieldError, ValidationError
 from django.core.files import File
-from django.db.models import (Case, Count, Exists, OuterRef, ProtectedError, Q,
-                              Subquery, Value, When)
+from django.db.models import Case, Count, Exists, OuterRef, ProtectedError, Q, Subquery, Value, When
 from django.db.models.fields.related import ForeignObjectRel
 from django.db.models.functions import Coalesce, Lower
 from django.http import FileResponse, HttpResponse, JsonResponse
@@ -27,6 +25,7 @@ from django.urls import reverse
 from django.utils.translation import gettext as _
 from django_scopes import scopes_disabled
 from icalendar import Calendar, Event
+from PIL import UnidentifiedImageError
 from requests.exceptions import MissingSchema
 from rest_framework import decorators, status, viewsets
 from rest_framework.authtoken.models import Token
@@ -45,39 +44,42 @@ from cookbook.helper.HelperFunctions import str2bool
 from cookbook.helper.image_processing import handle_image
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, CustomIsOwner,
-                                               CustomIsShare, CustomIsShared, CustomIsUser,
-                                               group_required, CustomIsSpaceOwner, switch_user_active_space, is_space_owner, CustomIsOwnerReadOnly)
+                                               CustomIsOwnerReadOnly, CustomIsShare, CustomIsShared,
+                                               CustomIsSpaceOwner, CustomIsUser, group_required,
+                                               is_space_owner, switch_user_active_space)
 from cookbook.helper.recipe_html_import import get_recipe_from_source
 from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search
 from cookbook.helper.recipe_url_import import get_from_youtube_scraper
 from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
 from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
-                             FoodInheritField, ImportLog, Ingredient, Keyword, MealPlan, MealType,
-                             Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
-                             ShoppingListEntry, ShoppingListRecipe, Step, Storage, Supermarket,
-                             SupermarketCategory, SupermarketCategoryRelation, Sync, SyncLog, Unit,
-                             UserFile, UserPreference, ViewLog, Space, UserSpace, InviteLink)
+                             FoodInheritField, ImportLog, Ingredient, InviteLink, Keyword, MealPlan,
+                             MealType, Recipe, RecipeBook, RecipeBookEntry, ShareLink, ShoppingList,
+                             ShoppingListEntry, ShoppingListRecipe, Space, Step, Storage,
+                             Supermarket, SupermarketCategory, SupermarketCategoryRelation, Sync,
+                             SyncLog, Unit, UserFile, UserPreference, UserSpace, ViewLog)
 from cookbook.provider.dropbox import Dropbox
 from cookbook.provider.local import Local
 from cookbook.provider.nextcloud import Nextcloud
 from cookbook.schemas import FilterSchema, QueryParam, QueryParamAutoSchema, TreeSchema
-from cookbook.serializer import (AutomationSerializer, BookmarkletImportSerializer,
-                                 CookLogSerializer, CustomFilterSerializer, ExportLogSerializer,
+from cookbook.serializer import (AutomationSerializer, BookmarkletImportListSerializer,
+                                 BookmarkletImportSerializer, CookLogSerializer,
+                                 CustomFilterSerializer, ExportLogSerializer,
                                  FoodInheritFieldSerializer, FoodSerializer,
-                                 FoodShoppingUpdateSerializer, ImportLogSerializer,
-                                 IngredientSerializer, KeywordSerializer, MealPlanSerializer,
+                                 FoodShoppingUpdateSerializer, GroupSerializer, ImportLogSerializer,
+                                 IngredientSerializer, IngredientSimpleSerializer,
+                                 InviteLinkSerializer, KeywordSerializer, MealPlanSerializer,
                                  MealTypeSerializer, RecipeBookEntrySerializer,
-                                 RecipeBookSerializer, RecipeImageSerializer,
-                                 RecipeOverviewSerializer, RecipeSerializer,
+                                 RecipeBookSerializer, RecipeFromSourceSerializer,
+                                 RecipeImageSerializer, RecipeOverviewSerializer, RecipeSerializer,
                                  RecipeShoppingUpdateSerializer, RecipeSimpleSerializer,
                                  ShoppingListAutoSyncSerializer, ShoppingListEntrySerializer,
                                  ShoppingListRecipeSerializer, ShoppingListSerializer,
-                                 StepSerializer, StorageSerializer,
+                                 SpaceSerializer, StepSerializer, StorageSerializer,
                                  SupermarketCategoryRelationSerializer,
                                  SupermarketCategorySerializer, SupermarketSerializer,
                                  SyncLogSerializer, SyncSerializer, UnitSerializer,
                                  UserFileSerializer, UserNameSerializer, UserPreferenceSerializer,
-                                 ViewLogSerializer, IngredientSimpleSerializer, BookmarkletImportListSerializer, RecipeFromSourceSerializer, SpaceSerializer, UserSpaceSerializer, GroupSerializer, InviteLinkSerializer)
+                                 UserSpaceSerializer, ViewLogSerializer)
 from recipes import settings
 
 
@@ -713,7 +715,7 @@ class RecipeViewSet(viewsets.ModelViewSet):
             'Query string matched (fuzzy) against recipe name. In the future also fulltext search.')),
         QueryParam(name='keywords', description=_(
             'ID of keyword a recipe should have. For multiple repeat parameter. Equivalent to keywords_or'),
-                   qtype='int'),
+            qtype='int'),
         QueryParam(name='keywords_or',
                    description=_('Keyword IDs, repeat for multiple. Return recipes with any of the keywords'),
                    qtype='int'),
@@ -1118,25 +1120,22 @@ def recipe_from_source(request):
     """
     serializer = RecipeFromSourceSerializer(data=request.data)
     if serializer.is_valid():
-        try:
-            if bookmarklet := BookmarkletImport.objects.filter(pk=serializer.validated_data['bookmarklet']).first():
-                serializer.validated_data['url'] = bookmarklet.url
-                serializer.validated_data['data'] = bookmarklet.html
-                bookmarklet.delete()
-        except KeyError:
-            pass
-
         # headers to use for request to external sites
         external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
 
-        if not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
+        if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
+            serializer.validated_data['url'] = bookmarklet.url
+            serializer.validated_data['data'] = bookmarklet.html
+            bookmarklet.delete()
+
+        elif not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
             return Response({
                 'error': True,
                 'msg': _('Nothing to do.')
             }, status=status.HTTP_400_BAD_REQUEST)
 
         # in manual mode request complete page to return it later
-        if 'url' in serializer.validated_data:
+        elif 'url' in serializer.validated_data and serializer.validated_data['url'] != '':
             if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', serializer.validated_data['url']):
                 if validators.url(serializer.validated_data['url'], public=True):
                     return Response({

From 25a41bd293873febcb57febdd0fb378852cc7086 Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Thu, 7 Jul 2022 06:43:07 -0500
Subject: [PATCH 2/4] reverting scraper to just using wildmode

---
 cookbook/helper/recipe_html_import.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index 48dc4c119..c97629ef9 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -6,7 +6,7 @@ from urllib.parse import unquote
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from recipe_scrapers import scrape_html, scrape_me
-from recipe_scrapers._exceptions import NoSchemaFoundInWildMode, WebsiteNotImplementedError
+from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
 from recipe_scrapers._utils import get_host_name, normalize_string
 
 from cookbook.helper import recipe_url_import as helper
@@ -70,12 +70,9 @@ def get_recipe_from_source(text, url, request):
 
     if url and not text:
         try:
-            scrape = scrape_me(url_path=url)
-        except WebsiteNotImplementedError:
-            try:
-                scrape = scrape_me(url_path=url, wild_mode=True)
-            except(NoSchemaFoundInWildMode):
-                pass
+            scrape = scrape_me(url_path=url, wild_mode=True)
+        except(NoSchemaFoundInWildMode):
+            pass
     if not scrape:
         try:
             parse_list.append(remove_graph(json.loads(text)))

From b1c0334947d6b6c8c0fd59efff70ebda3984729e Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Thu, 7 Jul 2022 07:50:57 -0500
Subject: [PATCH 3/4] quick hack to allow scraper to work correctly

---
 cookbook/helper/recipe_html_import.py | 2 ++
 cookbook/views/api.py                 | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index c97629ef9..62a057e2f 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -73,6 +73,7 @@ def get_recipe_from_source(text, url, request):
             scrape = scrape_me(url_path=url, wild_mode=True)
         except(NoSchemaFoundInWildMode):
             pass
+
     if not scrape:
         try:
             parse_list.append(remove_graph(json.loads(text)))
@@ -101,6 +102,7 @@ def get_recipe_from_source(text, url, request):
 
     recipe_json = helper.get_from_scraper(scrape, request)
 
+    # TODO: DEPRECATE recipe_tree & html_data.  first validate it isn't used anywhere
     for el in parse_list:
         temp_tree = []
         if isinstance(el, Tag):
diff --git a/cookbook/views/api.py b/cookbook/views/api.py
index 9b077b5c1..4325fe385 100644
--- a/cookbook/views/api.py
+++ b/cookbook/views/api.py
@@ -1120,7 +1120,7 @@ def recipe_from_source(request):
     """
     serializer = RecipeFromSourceSerializer(data=request.data)
     if serializer.is_valid():
-        # headers to use for request to external sites
+        # headers to use for request to external sites - DEPRECATE
         external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
 
         if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
@@ -1144,9 +1144,11 @@ def recipe_from_source(request):
                         'recipe_html': '',
                         'recipe_images': [],
                     }, status=status.HTTP_200_OK)
+            #######
+            # this section is redundant to scrape_me.  REFACTOR to catch errors from scrape_me
             try:
                 if validators.url(serializer.validated_data['url'], public=True):
-                    serializer.validated_data['data'] = requests.get(serializer.validated_data['url'], headers=external_request_headers).content
+                    requests.get(serializer.validated_data['url'], headers=external_request_headers).content
                 else:
                     return Response({
                         'error': True,
@@ -1162,6 +1164,7 @@ def recipe_from_source(request):
                     'error': True,
                     'msg': _('Bad URL Schema.')
                 }, status=status.HTTP_400_BAD_REQUEST)
+            #######
 
         recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request)
         if len(recipe_tree) == 0 and len(recipe_json) == 0:

From e40b73f420564dd927bd692f6d4df1055e30de07 Mon Sep 17 00:00:00 2001
From: smilerz <smilerz@gmail.com>
Date: Thu, 7 Jul 2022 15:09:22 -0500
Subject: [PATCH 4/4] deprecate get_recipe_from_source

---
 cookbook/helper/recipe_html_import.py  | 336 ++++++++++++-------------
 cookbook/helper/recipe_url_import.py   |  35 ++-
 cookbook/helper/scrapers/scrapers.py   |  10 +-
 cookbook/integration/cookbookapp.py    |   9 +-
 cookbook/integration/copymethat.py     |   3 +-
 cookbook/views/api.py                  |  95 ++++---
 vue/src/apps/ImportView/ImportView.vue |  12 +-
 7 files changed, 272 insertions(+), 228 deletions(-)

diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py
index 62a057e2f..95f115b76 100644
--- a/cookbook/helper/recipe_html_import.py
+++ b/cookbook/helper/recipe_html_import.py
@@ -1,191 +1,191 @@
-import json
-import re
-from json import JSONDecodeError
-from urllib.parse import unquote
+# import json
+# import re
+# from json import JSONDecodeError
+# from urllib.parse import unquote
 
-from bs4 import BeautifulSoup
-from bs4.element import Tag
-from recipe_scrapers import scrape_html, scrape_me
-from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
-from recipe_scrapers._utils import get_host_name, normalize_string
+# from bs4 import BeautifulSoup
+# from bs4.element import Tag
+# from recipe_scrapers import scrape_html, scrape_me
+# from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
+# from recipe_scrapers._utils import get_host_name, normalize_string
 
-from cookbook.helper import recipe_url_import as helper
-from cookbook.helper.scrapers.scrapers import text_scraper
+# from cookbook.helper import recipe_url_import as helper
+# from cookbook.helper.scrapers.scrapers import text_scraper
 
 
-def get_recipe_from_source(text, url, request):
-    def build_node(k, v):
-        if isinstance(v, dict):
-            node = {
-                'name': k,
-                'value': k,
-                'children': get_children_dict(v)
-            }
-        elif isinstance(v, list):
-            node = {
-                'name': k,
-                'value': k,
-                'children': get_children_list(v)
-            }
-        else:
-            node = {
-                'name': k + ": " + normalize_string(str(v)),
-                'value': normalize_string(str(v))
-            }
-        return node
+# def get_recipe_from_source(text, url, request):
+#     def build_node(k, v):
+#         if isinstance(v, dict):
+#             node = {
+#                 'name': k,
+#                 'value': k,
+#                 'children': get_children_dict(v)
+#             }
+#         elif isinstance(v, list):
+#             node = {
+#                 'name': k,
+#                 'value': k,
+#                 'children': get_children_list(v)
+#             }
+#         else:
+#             node = {
+#                 'name': k + ": " + normalize_string(str(v)),
+#                 'value': normalize_string(str(v))
+#             }
+#         return node
 
-    def get_children_dict(children):
-        kid_list = []
-        for k, v in children.items():
-            kid_list.append(build_node(k, v))
-        return kid_list
+#     def get_children_dict(children):
+#         kid_list = []
+#         for k, v in children.items():
+#             kid_list.append(build_node(k, v))
+#         return kid_list
 
-    def get_children_list(children):
-        kid_list = []
-        for kid in children:
-            if type(kid) == list:
-                node = {
-                    'name': "unknown list",
-                    'value': "unknown list",
-                    'children': get_children_list(kid)
-                }
-                kid_list.append(node)
-            elif type(kid) == dict:
-                for k, v in kid.items():
-                    kid_list.append(build_node(k, v))
-            else:
-                kid_list.append({
-                    'name': normalize_string(str(kid)),
-                    'value': normalize_string(str(kid))
-                })
-        return kid_list
+#     def get_children_list(children):
+#         kid_list = []
+#         for kid in children:
+#             if type(kid) == list:
+#                 node = {
+#                     'name': "unknown list",
+#                     'value': "unknown list",
+#                     'children': get_children_list(kid)
+#                 }
+#                 kid_list.append(node)
+#             elif type(kid) == dict:
+#                 for k, v in kid.items():
+#                     kid_list.append(build_node(k, v))
+#             else:
+#                 kid_list.append({
+#                     'name': normalize_string(str(kid)),
+#                     'value': normalize_string(str(kid))
+#                 })
+#         return kid_list
 
-    recipe_tree = []
-    parse_list = []
-    soup = BeautifulSoup(text, "html.parser")
-    html_data = get_from_html(soup)
-    images = get_images_from_source(soup, url)
-    text = unquote(text)
-    scrape = None
+#     recipe_tree = []
+#     parse_list = []
+#     soup = BeautifulSoup(text, "html.parser")
+#     html_data = get_from_html(soup)
+#     images = get_images_from_source(soup, url)
+#     text = unquote(text)
+#     scrape = None
 
-    if url and not text:
-        try:
-            scrape = scrape_me(url_path=url, wild_mode=True)
-        except(NoSchemaFoundInWildMode):
-            pass
+#     if url and not text:
+#         try:
+#             scrape = scrape_me(url_path=url, wild_mode=True)
+#         except(NoSchemaFoundInWildMode):
+#             pass
 
-    if not scrape:
-        try:
-            parse_list.append(remove_graph(json.loads(text)))
-            if not url and 'url' in parse_list[0]:
-                url = parse_list[0]['url']
-            scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
+#     if not scrape:
+#         try:
+#             parse_list.append(remove_graph(json.loads(text)))
+#             if not url and 'url' in parse_list[0]:
+#                 url = parse_list[0]['url']
+#             scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
 
-        except JSONDecodeError:
-            for el in soup.find_all('script', type='application/ld+json'):
-                el = remove_graph(el)
-                if not url and 'url' in el:
-                    url = el['url']
-                if type(el) == list:
-                    for le in el:
-                        parse_list.append(le)
-                elif type(el) == dict:
-                    parse_list.append(el)
-            for el in soup.find_all(type='application/json'):
-                el = remove_graph(el)
-                if type(el) == list:
-                    for le in el:
-                        parse_list.append(le)
-                elif type(el) == dict:
-                    parse_list.append(el)
-            scrape = text_scraper(text, url=url)
+#         except JSONDecodeError:
+#             for el in soup.find_all('script', type='application/ld+json'):
+#                 el = remove_graph(el)
+#                 if not url and 'url' in el:
+#                     url = el['url']
+#                 if type(el) == list:
+#                     for le in el:
+#                         parse_list.append(le)
+#                 elif type(el) == dict:
+#                     parse_list.append(el)
+#             for el in soup.find_all(type='application/json'):
+#                 el = remove_graph(el)
+#                 if type(el) == list:
+#                     for le in el:
+#                         parse_list.append(le)
+#                 elif type(el) == dict:
+#                     parse_list.append(el)
+#             scrape = text_scraper(text, url=url)
 
-    recipe_json = helper.get_from_scraper(scrape, request)
+#     recipe_json = helper.get_from_scraper(scrape, request)
 
-    # TODO: DEPRECATE recipe_tree & html_data.  first validate it isn't used anywhere
-    for el in parse_list:
-        temp_tree = []
-        if isinstance(el, Tag):
-            try:
-                el = json.loads(el.string)
-            except TypeError:
-                continue
+#     # TODO: DEPRECATE recipe_tree & html_data.  first validate it isn't used anywhere
+#     for el in parse_list:
+#         temp_tree = []
+#         if isinstance(el, Tag):
+#             try:
+#                 el = json.loads(el.string)
+#             except TypeError:
+#                 continue
 
-        for k, v in el.items():
-            if isinstance(v, dict):
-                node = {
-                    'name': k,
-                    'value': k,
-                    'children': get_children_dict(v)
-                }
-            elif isinstance(v, list):
-                node = {
-                    'name': k,
-                    'value': k,
-                    'children': get_children_list(v)
-                }
-            else:
-                node = {
-                    'name': k + ": " + normalize_string(str(v)),
-                    'value': normalize_string(str(v))
-                }
-            temp_tree.append(node)
+#         for k, v in el.items():
+#             if isinstance(v, dict):
+#                 node = {
+#                     'name': k,
+#                     'value': k,
+#                     'children': get_children_dict(v)
+#                 }
+#             elif isinstance(v, list):
+#                 node = {
+#                     'name': k,
+#                     'value': k,
+#                     'children': get_children_list(v)
+#                 }
+#             else:
+#                 node = {
+#                     'name': k + ": " + normalize_string(str(v)),
+#                     'value': normalize_string(str(v))
+#                 }
+#             temp_tree.append(node)
 
-        if '@type' in el and el['@type'] == 'Recipe':
-            recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
-        else:
-            recipe_tree += [{'name': 'json', 'children': temp_tree}]
+#         if '@type' in el and el['@type'] == 'Recipe':
+#             recipe_tree += [{'name': 'ld+json', 'children': temp_tree}]
+#         else:
+#             recipe_tree += [{'name': 'json', 'children': temp_tree}]
 
-    return recipe_json, recipe_tree, html_data, images
+#     return recipe_json, recipe_tree, html_data, images
 
 
-def get_from_html(soup):
-    INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
-    html = []
-    for s in soup.strings:
-        if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
-            html.append(s)
-    return html
+# def get_from_html(soup):
+#     INVISIBLE_ELEMS = ('style', 'script', 'head', 'title')
+#     html = []
+#     for s in soup.strings:
+#         if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)):
+#             html.append(s)
+#     return html
 
 
-def get_images_from_source(soup, url):
-    sources = ['src', 'srcset', 'data-src']
-    images = []
-    img_tags = soup.find_all('img')
-    if url:
-        site = get_host_name(url)
-        prot = url.split(':')[0]
+# def get_images_from_source(soup, url):
+#     sources = ['src', 'srcset', 'data-src']
+#     images = []
+#     img_tags = soup.find_all('img')
+#     if url:
+#         site = get_host_name(url)
+#         prot = url.split(':')[0]
 
-    urls = []
-    for img in img_tags:
-        for src in sources:
-            try:
-                urls.append(img[src])
-            except KeyError:
-                pass
+#     urls = []
+#     for img in img_tags:
+#         for src in sources:
+#             try:
+#                 urls.append(img[src])
+#             except KeyError:
+#                 pass
 
-    for u in urls:
-        u = u.split('?')[0]
-        filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
-        if filename:
-            if (('http' not in u) and (url)):
-                # sometimes an image source can be relative
-                # if it is provide the base url
-                u = '{}://{}{}'.format(prot, site, u)
-            if 'http' in u:
-                images.append(u)
-    return images
+#     for u in urls:
+#         u = u.split('?')[0]
+#         filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
+#         if filename:
+#             if (('http' not in u) and (url)):
+#                 # sometimes an image source can be relative
+#                 # if it is provide the base url
+#                 u = '{}://{}{}'.format(prot, site, u)
+#             if 'http' in u:
+#                 images.append(u)
+#     return images
 
 
-def remove_graph(el):
-    # recipes type might be wrapped in @graph type
-    if isinstance(el, Tag):
-        try:
-            el = json.loads(el.string)
-            if '@graph' in el:
-                for x in el['@graph']:
-                    if '@type' in x and x['@type'] == 'Recipe':
-                        el = x
-        except (TypeError, JSONDecodeError):
-            pass
-    return el
+# def remove_graph(el):
+#     # recipes type might be wrapped in @graph type
+#     if isinstance(el, Tag):
+#         try:
+#             el = json.loads(el.string)
+#             if '@graph' in el:
+#                 for x in el['@graph']:
+#                     if '@type' in x and x['@type'] == 'Recipe':
+#                         el = x
+#         except (TypeError, JSONDecodeError):
+#             pass
+#     return el
diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py
index aa3cc5cff..cec57e729 100644
--- a/cookbook/helper/recipe_url_import.py
+++ b/cookbook/helper/recipe_url_import.py
@@ -1,21 +1,19 @@
 import random
 import re
 from html import unescape
-
-from pytube import YouTube
 from unicodedata import decomposition
 
 from django.utils.dateparse import parse_duration
 from django.utils.translation import gettext as _
 from isodate import parse_duration as iso_parse_duration
 from isodate.isoerror import ISO8601Error
-from recipe_scrapers._utils import get_minutes
+from pytube import YouTube
+from recipe_scrapers._utils import get_host_name, get_minutes
 
 from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.ingredient_parser import IngredientParser
 from cookbook.models import Keyword
 
-
 # from recipe_scrapers._utils import get_minutes  ## temporary until/unless upstream incorporates get_minutes() PR
 
 
@@ -369,3 +367,32 @@ def iso_duration_to_minutes(string):
         string
     ).groupdict()
     return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)
+
+
+def get_images_from_soup(soup, url):
+    sources = ['src', 'srcset', 'data-src']
+    images = []
+    img_tags = soup.find_all('img')
+    if url:
+        site = get_host_name(url)
+        prot = url.split(':')[0]
+
+    urls = []
+    for img in img_tags:
+        for src in sources:
+            try:
+                urls.append(img[src])
+            except KeyError:
+                pass
+
+    for u in urls:
+        u = u.split('?')[0]
+        filename = re.search(r'/([\w_-]+[.](jpg|jpeg|gif|png))$', u)
+        if filename:
+            if (('http' not in u) and (url)):
+                # sometimes an image source can be relative
+                # if it is provide the base url
+                u = '{}://{}{}'.format(prot, site, u)
+            if 'http' in u:
+                images.append(u)
+    return images
diff --git a/cookbook/helper/scrapers/scrapers.py b/cookbook/helper/scrapers/scrapers.py
index 94e3daea0..7d6c08b15 100644
--- a/cookbook/helper/scrapers/scrapers.py
+++ b/cookbook/helper/scrapers/scrapers.py
@@ -27,17 +27,17 @@ def text_scraper(text, url=None):
     class TextScraper(scraper_class):
         def __init__(
             self,
-            page_data,
-            url=None
+            html=None,
+            url=None,
         ):
             self.wild_mode = False
             self.meta_http_equiv = False
-            self.soup = BeautifulSoup(page_data, "html.parser")
+            self.soup = BeautifulSoup(html, "html.parser")
             self.url = url
             self.recipe = None
             try:
-                self.schema = SchemaOrg(page_data)
+                self.schema = SchemaOrg(html)
             except (JSONDecodeError, AttributeError):
                 pass
 
-    return TextScraper(text, url)
+    return TextScraper(url=url, html=text)
diff --git a/cookbook/integration/cookbookapp.py b/cookbook/integration/cookbookapp.py
index f22e9d45d..7ff50ab62 100644
--- a/cookbook/integration/cookbookapp.py
+++ b/cookbook/integration/cookbookapp.py
@@ -10,8 +10,8 @@ import validators
 import yaml
 
 from cookbook.helper.ingredient_parser import IngredientParser
-from cookbook.helper.recipe_html_import import get_recipe_from_source
-from cookbook.helper.recipe_url_import import iso_duration_to_minutes
+from cookbook.helper.recipe_url_import import get_images_from_soup, iso_duration_to_minutes
+from cookbook.helper.scrapers.scrapers import text_scraper
 from cookbook.integration.integration import Integration
 from cookbook.models import Ingredient, Keyword, Recipe, Step
 
@@ -24,7 +24,10 @@ class CookBookApp(Integration):
     def get_recipe_from_file(self, file):
         recipe_html = file.getvalue().decode("utf-8")
 
-        recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
+        # recipe_json, recipe_tree, html_data, images = get_recipe_from_source(recipe_html, 'CookBookApp', self.request)
+        scrape = text_scraper(text=data)
+        recipe_json = helper.get_from_scraper(scrape, request)
+        images = list(dict.fromkeys(get_images_from_soup(scrape.soup, url)))
 
         recipe = Recipe.objects.create(
             name=recipe_json['name'].strip(),
diff --git a/cookbook/integration/copymethat.py b/cookbook/integration/copymethat.py
index 7a2a532f9..2a9c56521 100644
--- a/cookbook/integration/copymethat.py
+++ b/cookbook/integration/copymethat.py
@@ -3,10 +3,9 @@ from io import BytesIO
 from zipfile import ZipFile
 
 from bs4 import BeautifulSoup
-
 from django.utils.translation import gettext as _
+
 from cookbook.helper.ingredient_parser import IngredientParser
-from cookbook.helper.recipe_html_import import get_recipe_from_source
 from cookbook.helper.recipe_url_import import iso_duration_to_minutes, parse_servings
 from cookbook.integration.integration import Integration
 from cookbook.models import Ingredient, Keyword, Recipe, Step
diff --git a/cookbook/views/api.py b/cookbook/views/api.py
index 4325fe385..54df51bfb 100644
--- a/cookbook/views/api.py
+++ b/cookbook/views/api.py
@@ -5,6 +5,8 @@ import re
 import traceback
 import uuid
 from collections import OrderedDict
+from json import JSONDecodeError
+from urllib.parse import unquote
 from zipfile import ZipFile
 
 import requests
@@ -26,6 +28,8 @@ from django.utils.translation import gettext as _
 from django_scopes import scopes_disabled
 from icalendar import Calendar, Event
 from PIL import UnidentifiedImageError
+from recipe_scrapers import scrape_html, scrape_me
+from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
 from requests.exceptions import MissingSchema
 from rest_framework import decorators, status, viewsets
 from rest_framework.authtoken.models import Token
@@ -40,6 +44,7 @@ from rest_framework.throttling import AnonRateThrottle
 from rest_framework.viewsets import ViewSetMixin
 from treebeard.exceptions import InvalidMoveToDescendant, InvalidPosition, PathOverflow
 
+from cookbook.helper import recipe_url_import as helper
 from cookbook.helper.HelperFunctions import str2bool
 from cookbook.helper.image_processing import handle_image
 from cookbook.helper.ingredient_parser import IngredientParser
@@ -47,9 +52,9 @@ from cookbook.helper.permission_helper import (CustomIsAdmin, CustomIsGuest, Cus
                                                CustomIsOwnerReadOnly, CustomIsShare, CustomIsShared,
                                                CustomIsSpaceOwner, CustomIsUser, group_required,
                                                is_space_owner, switch_user_active_space)
-from cookbook.helper.recipe_html_import import get_recipe_from_source
 from cookbook.helper.recipe_search import RecipeFacet, RecipeSearch, old_search
-from cookbook.helper.recipe_url_import import get_from_youtube_scraper
+from cookbook.helper.recipe_url_import import get_from_youtube_scraper, get_images_from_soup
+from cookbook.helper.scrapers.scrapers import text_scraper
 from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
 from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
                              FoodInheritField, ImportLog, Ingredient, InviteLink, Keyword, MealPlan,
@@ -1116,69 +1121,79 @@ def recipe_from_source(request):
             - url: url to use for importing recipe
             - data: if no url is given recipe is imported from provided source data
             - (optional) bookmarklet: id of bookmarklet import to use, overrides URL and data attributes
-    :return: JsonResponse containing the parsed json, original html,json and images
+    :return: JsonResponse containing the parsed json and images
     """
+    scrape = None
     serializer = RecipeFromSourceSerializer(data=request.data)
     if serializer.is_valid():
-        # headers to use for request to external sites - DEPRECATE
-        external_request_headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"}
 
         if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (bookmarklet := BookmarkletImport.objects.filter(pk=b_pk).first()):
             serializer.validated_data['url'] = bookmarklet.url
             serializer.validated_data['data'] = bookmarklet.html
             bookmarklet.delete()
 
-        elif not 'url' in serializer.validated_data and not 'data' in serializer.validated_data:
+        url = serializer.validated_data.get('url', None)
+        data = unquote(serializer.validated_data.get('data', None))
+        if not url and not data:
             return Response({
                 'error': True,
                 'msg': _('Nothing to do.')
             }, status=status.HTTP_400_BAD_REQUEST)
 
-        # in manual mode request complete page to return it later
-        elif 'url' in serializer.validated_data and serializer.validated_data['url'] != '':
-            if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', serializer.validated_data['url']):
-                if validators.url(serializer.validated_data['url'], public=True):
+        elif url and not data:
+            if re.match('^(https?://)?(www\.youtube\.com|youtu\.be)/.+$', url):
+                if validators.url(url, public=True):
                     return Response({
-                        'recipe_json': get_from_youtube_scraper(serializer.validated_data['url'], request),
-                        'recipe_tree': '',
-                        'recipe_html': '',
+                        'recipe_json': get_from_youtube_scraper(url, request),
+                        # 'recipe_tree': '',
+                        # 'recipe_html': '',
                         'recipe_images': [],
                     }, status=status.HTTP_200_OK)
-            #######
-            # this section is redundant to scrape_me.  REFACTOR to catch errors from scrape_me
-            try:
-                if validators.url(serializer.validated_data['url'], public=True):
-                    requests.get(serializer.validated_data['url'], headers=external_request_headers).content
-                else:
+            else:
+                try:
+                    if validators.url(url, public=True):
+                        scrape = scrape_me(url_path=url, wild_mode=True)
+
+                    else:
+                        return Response({
+                            'error': True,
+                            'msg': _('Invalid Url')
+                        }, status=status.HTTP_400_BAD_REQUEST)
+                except NoSchemaFoundInWildMode:
+                    pass
+                except requests.exceptions.ConnectionError:
                     return Response({
                         'error': True,
-                        'msg': _('Invalid Url')
+                        'msg': _('Connection Refused.')
                     }, status=status.HTTP_400_BAD_REQUEST)
-            except requests.exceptions.ConnectionError:
-                return Response({
-                    'error': True,
-                    'msg': _('Connection Refused.')
-                }, status=status.HTTP_400_BAD_REQUEST)
-            except requests.exceptions.MissingSchema:
-                return Response({
-                    'error': True,
-                    'msg': _('Bad URL Schema.')
-                }, status=status.HTTP_400_BAD_REQUEST)
-            #######
+                except requests.exceptions.MissingSchema:
+                    return Response({
+                        'error': True,
+                        'msg': _('Bad URL Schema.')
+                    }, status=status.HTTP_400_BAD_REQUEST)
+        else:
+            try:
+                json.loads(data)
+                data = "<script type='application/ld+json'>" + data + "</script>"
+            except JSONDecodeError:
+                pass
+            scrape = text_scraper(text=data, url=url)
+            if not url and (found_url := scrape.schema.data.get('url', None)):
+                scrape = text_scraper(text=data, url=found_url)
 
-        recipe_json, recipe_tree, recipe_html, recipe_images = get_recipe_from_source(serializer.validated_data['data'], serializer.validated_data['url'], request)
-        if len(recipe_tree) == 0 and len(recipe_json) == 0:
+        if scrape:
+            return Response({
+                'recipe_json': helper.get_from_scraper(scrape, request),
+                # 'recipe_tree': recipe_tree,
+                # 'recipe_html': recipe_html,
+                'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
+            }, status=status.HTTP_200_OK)
+
+        else:
             return Response({
                 'error': True,
                 'msg': _('No usable data could be found.')
             }, status=status.HTTP_400_BAD_REQUEST)
-        else:
-            return Response({
-                'recipe_json': recipe_json,
-                'recipe_tree': recipe_tree,
-                'recipe_html': recipe_html,
-                'recipe_images': list(dict.fromkeys(recipe_images)),
-            }, status=status.HTTP_200_OK)
     else:
         return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
 
diff --git a/vue/src/apps/ImportView/ImportView.vue b/vue/src/apps/ImportView/ImportView.vue
index 407664929..4b2ed0ffd 100644
--- a/vue/src/apps/ImportView/ImportView.vue
+++ b/vue/src/apps/ImportView/ImportView.vue
@@ -461,8 +461,8 @@ export default {
             recent_urls: [],
             source_data: '',
             recipe_json: undefined,
-            recipe_html: undefined,
-            recipe_tree: undefined,
+            // recipe_html: undefined,
+            // recipe_tree: undefined,
             recipe_images: [],
             imported_recipes: [],
             failed_imports: [],
@@ -593,9 +593,9 @@ export default {
             }
 
             // reset all variables
-            this.recipe_html = undefined
+            // this.recipe_html = undefined
             this.recipe_json = undefined
-            this.recipe_tree = undefined
+            // this.recipe_tree = undefined
             this.recipe_images = []
 
             // load recipe
@@ -621,8 +621,8 @@ export default {
                     return x
                 })
 
-                this.recipe_tree = response.data['recipe_tree'];
-                this.recipe_html = response.data['recipe_html'];
+                // this.recipe_tree = response.data['recipe_tree'];
+                // this.recipe_html = response.data['recipe_html'];
                 this.recipe_images = response.data['recipe_images'] !== undefined ? response.data['recipe_images'] : [];
 
                 if (!silent) {