mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-03 21:37:49 -05:00
@@ -1,68 +0,0 @@
|
|||||||
import json
|
|
||||||
from recipe_scrapers._abstract import AbstractScraper
|
|
||||||
|
|
||||||
|
|
||||||
class CooksIllustrated(AbstractScraper):
|
|
||||||
@classmethod
|
|
||||||
def host(cls, site='cooksillustrated'):
|
|
||||||
return {
|
|
||||||
'cooksillustrated': f"{site}.com",
|
|
||||||
'americastestkitchen': f"{site}.com",
|
|
||||||
'cookscountry': f"{site}.com",
|
|
||||||
}.get(site)
|
|
||||||
|
|
||||||
def title(self):
|
|
||||||
return self.schema.title()
|
|
||||||
|
|
||||||
def image(self):
|
|
||||||
return self.schema.image()
|
|
||||||
|
|
||||||
def total_time(self):
|
|
||||||
if not self.recipe:
|
|
||||||
self.get_recipe()
|
|
||||||
return self.recipe['recipeTimeNote']
|
|
||||||
|
|
||||||
def yields(self):
|
|
||||||
if not self.recipe:
|
|
||||||
self.get_recipe()
|
|
||||||
return self.recipe['yields']
|
|
||||||
|
|
||||||
def ingredients(self):
|
|
||||||
if not self.recipe:
|
|
||||||
self.get_recipe()
|
|
||||||
ingredients = []
|
|
||||||
for group in self.recipe['ingredientGroups']:
|
|
||||||
ingredients += group['fields']['recipeIngredientItems']
|
|
||||||
return [
|
|
||||||
"{} {} {}{}".format(
|
|
||||||
i['fields']['qty'] or '',
|
|
||||||
i['fields']['measurement'] or '',
|
|
||||||
i['fields']['ingredient']['fields']['title'] or '',
|
|
||||||
i['fields']['postText'] or ''
|
|
||||||
)
|
|
||||||
for i in ingredients
|
|
||||||
]
|
|
||||||
|
|
||||||
def instructions(self):
|
|
||||||
if not self.recipe:
|
|
||||||
self.get_recipe()
|
|
||||||
if self.recipe.get('headnote', False):
|
|
||||||
i = ['Note: ' + self.recipe.get('headnote', '')]
|
|
||||||
else:
|
|
||||||
i = []
|
|
||||||
return "\n".join(
|
|
||||||
i
|
|
||||||
+ [self.recipe.get('whyThisWorks', '')]
|
|
||||||
+ [
|
|
||||||
instruction['fields']['content']
|
|
||||||
for instruction in self.recipe['instructions']
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def nutrients(self):
|
|
||||||
raise NotImplementedError("This should be implemented.")
|
|
||||||
|
|
||||||
def get_recipe(self):
|
|
||||||
j = json.loads(self.soup.find(type='application/json').string)
|
|
||||||
name = list(j['props']['initialState']['content']['documents'])[0]
|
|
||||||
self.recipe = j['props']['initialState']['content']['documents'][name]
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
from json import JSONDecodeError
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from recipe_scrapers import SCRAPERS, get_host_name
|
|
||||||
from recipe_scrapers._factory import SchemaScraperFactory
|
|
||||||
from recipe_scrapers._schemaorg import SchemaOrg
|
|
||||||
|
|
||||||
from .cooksillustrated import CooksIllustrated
|
|
||||||
|
|
||||||
CUSTOM_SCRAPERS = {
|
|
||||||
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
|
|
||||||
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
|
|
||||||
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
|
|
||||||
}
|
|
||||||
SCRAPERS.update(CUSTOM_SCRAPERS)
|
|
||||||
|
|
||||||
|
|
||||||
def text_scraper(text, url=None):
|
|
||||||
domain = None
|
|
||||||
if url:
|
|
||||||
domain = get_host_name(url)
|
|
||||||
if domain in SCRAPERS:
|
|
||||||
scraper_class = SCRAPERS[domain]
|
|
||||||
else:
|
|
||||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
|
||||||
|
|
||||||
class TextScraper(scraper_class):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
html=None,
|
|
||||||
url=None,
|
|
||||||
):
|
|
||||||
self.supported_only = False
|
|
||||||
self.meta_http_equiv = False
|
|
||||||
self.soup = BeautifulSoup(html, "html.parser")
|
|
||||||
self.url = url
|
|
||||||
self.recipe = None
|
|
||||||
try:
|
|
||||||
self.schema = SchemaOrg(html)
|
|
||||||
except (JSONDecodeError, AttributeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
return TextScraper(url=url, html=text)
|
|
||||||
@@ -7,7 +7,7 @@ import validators
|
|||||||
from cookbook.helper.ingredient_parser import IngredientParser
|
from cookbook.helper.ingredient_parser import IngredientParser
|
||||||
from cookbook.helper.recipe_url_import import (get_from_scraper, get_images_from_soup,
|
from cookbook.helper.recipe_url_import import (get_from_scraper, get_images_from_soup,
|
||||||
iso_duration_to_minutes)
|
iso_duration_to_minutes)
|
||||||
from cookbook.helper.scrapers.scrapers import text_scraper
|
from recipe_scrapers import scrape_html
|
||||||
from cookbook.integration.integration import Integration
|
from cookbook.integration.integration import Integration
|
||||||
from cookbook.models import Ingredient, Recipe, Step
|
from cookbook.models import Ingredient, Recipe, Step
|
||||||
|
|
||||||
@@ -20,7 +20,7 @@ class CookBookApp(Integration):
|
|||||||
def get_recipe_from_file(self, file):
|
def get_recipe_from_file(self, file):
|
||||||
recipe_html = file.getvalue().decode("utf-8")
|
recipe_html = file.getvalue().decode("utf-8")
|
||||||
|
|
||||||
scrape = text_scraper(text=recipe_html)
|
scrape = scrape_html(html=recipe_html, org_url="https://cookbookapp.import", supported_only=False)
|
||||||
recipe_json = get_from_scraper(scrape, self.request)
|
recipe_json = get_from_scraper(scrape, self.request)
|
||||||
images = list(dict.fromkeys(get_images_from_soup(scrape.soup, None)))
|
images = list(dict.fromkeys(get_images_from_soup(scrape.soup, None)))
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ def test_list_space(obj_1, obj_2, u1_s1, u1_s2, space_2):
|
|||||||
['g1_s2', 403],
|
['g1_s2', 403],
|
||||||
['u1_s2', 404],
|
['u1_s2', 404],
|
||||||
['a1_s2', 404],
|
['a1_s2', 404],
|
||||||
])
|
], ids=str)
|
||||||
def test_update(arg, request, obj_1):
|
def test_update(arg, request, obj_1):
|
||||||
c = request.getfixturevalue(arg[0])
|
c = request.getfixturevalue(arg[0])
|
||||||
r = c.patch(
|
r = c.patch(
|
||||||
|
|||||||
@@ -273,12 +273,12 @@ def test_search_units(found_recipe, recipes, u1_s1, space_1):
|
|||||||
('fuzzy_lookups', True), ('fuzzy_lookups', False)
|
('fuzzy_lookups', True), ('fuzzy_lookups', False)
|
||||||
],
|
],
|
||||||
[('unaccent', True), ('unaccent', False)]
|
[('unaccent', True), ('unaccent', False)]
|
||||||
), indirect=['user1'])
|
), indirect=['user1'], ids=str)
|
||||||
@pytest.mark.parametrize("found_recipe, param_type", [
|
@pytest.mark.parametrize("found_recipe, param_type", [
|
||||||
({'unit': True}, 'unit'),
|
({'unit': True}, 'unit'),
|
||||||
({'keyword': True}, 'keyword'),
|
({'keyword': True}, 'keyword'),
|
||||||
({'food': True}, 'food'),
|
({'food': True}, 'food'),
|
||||||
], indirect=['found_recipe'])
|
], indirect=['found_recipe'], ids=str)
|
||||||
def test_fuzzy_lookup(found_recipe, recipes, param_type, user1, space_1):
|
def test_fuzzy_lookup(found_recipe, recipes, param_type, user1, space_1):
|
||||||
with scope(space=space_1):
|
with scope(space=space_1):
|
||||||
list_url = f'api:{param_type}-list'
|
list_url = f'api:{param_type}-list'
|
||||||
@@ -306,14 +306,14 @@ def test_fuzzy_lookup(found_recipe, recipes, param_type, user1, space_1):
|
|||||||
('istartswith', True), ('istartswith', False),
|
('istartswith', True), ('istartswith', False),
|
||||||
],
|
],
|
||||||
[('unaccent', True), ('unaccent', False)]
|
[('unaccent', True), ('unaccent', False)]
|
||||||
), indirect=['user1'])
|
), indirect=['user1'], ids=str)
|
||||||
@pytest.mark.parametrize("found_recipe", [
|
@pytest.mark.parametrize("found_recipe", [
|
||||||
({'name': True}),
|
({'name': True}),
|
||||||
({'description': True}),
|
({'description': True}),
|
||||||
({'instruction': True}),
|
({'instruction': True}),
|
||||||
({'keyword': True}),
|
({'keyword': True}),
|
||||||
({'food': True}),
|
({'food': True}),
|
||||||
], indirect=['found_recipe'])
|
], indirect=['found_recipe'], ids=str)
|
||||||
# user array contains: user client, expected count of search, expected count of mispelled search, search string, mispelled search string, user search preferences
|
# user array contains: user client, expected count of search, expected count of mispelled search, search string, mispelled search string, user search preferences
|
||||||
def test_search_string(found_recipe, recipes, user1, space_1):
|
def test_search_string(found_recipe, recipes, user1, space_1):
|
||||||
with scope(space=space_1):
|
with scope(space=space_1):
|
||||||
|
|||||||
@@ -19,6 +19,23 @@ DATA_DIR = "cookbook/tests/other/test_data/"
|
|||||||
# plus the test that previously existed
|
# plus the test that previously existed
|
||||||
# plus the custom scraper that was created
|
# plus the custom scraper that was created
|
||||||
# plus any specific defects discovered along the way
|
# plus any specific defects discovered along the way
|
||||||
|
RECIPES = [
|
||||||
|
ALLRECIPES,
|
||||||
|
AMERICAS_TEST_KITCHEN,
|
||||||
|
CHEF_KOCH,
|
||||||
|
CHEF_KOCH2, # test for empty ingredient in ingredient_parser
|
||||||
|
COOKPAD,
|
||||||
|
COOKS_COUNTRY,
|
||||||
|
DELISH,
|
||||||
|
FOOD_NETWORK,
|
||||||
|
GIALLOZAFFERANO,
|
||||||
|
JOURNAL_DES_FEMMES,
|
||||||
|
MADAME_DESSERT, # example of json only source
|
||||||
|
MARMITON,
|
||||||
|
TASTE_OF_HOME,
|
||||||
|
THE_SPRUCE_EATS, # example of non-json recipes_scraper
|
||||||
|
TUDOGOSTOSO,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("arg", [
|
@pytest.mark.parametrize("arg", [
|
||||||
@@ -32,29 +49,7 @@ def test_import_permission(arg, request):
|
|||||||
assert c.get(reverse(IMPORT_SOURCE_URL)).status_code == arg[1]
|
assert c.get(reverse(IMPORT_SOURCE_URL)).status_code == arg[1]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("arg", [
|
@pytest.mark.parametrize("arg", RECIPES, ids=[x['file'][0] for x in RECIPES])
|
||||||
ALLRECIPES,
|
|
||||||
# test of custom scraper ATK
|
|
||||||
AMERICAS_TEST_KITCHEN,
|
|
||||||
CHEF_KOCH,
|
|
||||||
# test for empty ingredient in ingredient_parser
|
|
||||||
CHEF_KOCH2,
|
|
||||||
COOKPAD,
|
|
||||||
# test of custom scraper ATK
|
|
||||||
COOKS_COUNTRY,
|
|
||||||
DELISH,
|
|
||||||
FOOD_NETWORK,
|
|
||||||
GIALLOZAFFERANO,
|
|
||||||
JOURNAL_DES_FEMMES,
|
|
||||||
# example of recipes_scraper in with wildmode
|
|
||||||
# example of json only source
|
|
||||||
MADAME_DESSERT,
|
|
||||||
MARMITON,
|
|
||||||
TASTE_OF_HOME,
|
|
||||||
# example of non-json recipes_scraper
|
|
||||||
THE_SPRUCE_EATS, # TODO seems to be broken in recipe scrapers
|
|
||||||
TUDOGOSTOSO,
|
|
||||||
])
|
|
||||||
def test_recipe_import(arg, u1_s1):
|
def test_recipe_import(arg, u1_s1):
|
||||||
url = arg['url']
|
url = arg['url']
|
||||||
for f in list(arg['file']): # url and files get popped later
|
for f in list(arg['file']): # url and files get popped later
|
||||||
|
|||||||
@@ -63,7 +63,6 @@ from cookbook.helper.permission_helper import (
|
|||||||
)
|
)
|
||||||
from cookbook.helper.recipe_search import RecipeSearch
|
from cookbook.helper.recipe_search import RecipeSearch
|
||||||
from cookbook.helper.recipe_url_import import clean_dict, get_from_youtube_scraper, get_images_from_soup
|
from cookbook.helper.recipe_url_import import clean_dict, get_from_youtube_scraper, get_images_from_soup
|
||||||
from cookbook.helper.scrapers.scrapers import text_scraper
|
|
||||||
from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
|
from cookbook.helper.shopping_helper import RecipeShoppingEditor, shopping_helper
|
||||||
from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
|
from cookbook.models import (Automation, BookmarkletImport, CookLog, CustomFilter, ExportLog, Food,
|
||||||
FoodInheritField, FoodProperty, ImportLog, Ingredient, InviteLink,
|
FoodInheritField, FoodProperty, ImportLog, Ingredient, InviteLink,
|
||||||
@@ -1437,7 +1436,10 @@ class RecipeUrlImportView(APIView):
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
if validators.url(url, public=True):
|
if validators.url(url, public=True):
|
||||||
html = requests.get(url).content
|
html = requests.get(
|
||||||
|
url,
|
||||||
|
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0"}
|
||||||
|
).content
|
||||||
scrape = scrape_html(org_url=url, html=html, supported_only=False)
|
scrape = scrape_html(org_url=url, html=html, supported_only=False)
|
||||||
else:
|
else:
|
||||||
return Response({'error': True, 'msg': _('Invalid Url')}, status=status.HTTP_400_BAD_REQUEST)
|
return Response({'error': True, 'msg': _('Invalid Url')}, status=status.HTTP_400_BAD_REQUEST)
|
||||||
@@ -1457,9 +1459,9 @@ class RecipeUrlImportView(APIView):
|
|||||||
data = "<script type='application/ld+json'>" + json.dumps(data_json) + "</script>"
|
data = "<script type='application/ld+json'>" + json.dumps(data_json) + "</script>"
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
scrape = text_scraper(text=data, url=url)
|
scrape = scrape_html(html=data, org_url=url, supported_only=False)
|
||||||
if not url and (found_url := scrape.schema.data.get('url', None)):
|
if not url and (found_url := scrape.schema.data.get('url', 'https://urlnotfound.none')):
|
||||||
scrape = text_scraper(text=data, url=found_url)
|
scrape = scrape_html(text=data, url=found_url, supported_only=False)
|
||||||
|
|
||||||
if scrape:
|
if scrape:
|
||||||
return Response({
|
return Response({
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ Jinja2==3.1.4
|
|||||||
django-webpack-loader==3.0.1
|
django-webpack-loader==3.0.1
|
||||||
git+https://github.com/BITSOLVER/django-js-reverse@071e304fd600107bc64bbde6f2491f1fe049ec82
|
git+https://github.com/BITSOLVER/django-js-reverse@071e304fd600107bc64bbde6f2491f1fe049ec82
|
||||||
django-allauth==0.61.1
|
django-allauth==0.61.1
|
||||||
recipe-scrapers==15.0.0-rc3
|
recipe-scrapers==15.0.0
|
||||||
django-scopes==2.0.0
|
django-scopes==2.0.0
|
||||||
django-treebeard==4.7
|
django-treebeard==4.7
|
||||||
django-cors-headers==4.3.1
|
django-cors-headers==4.3.1
|
||||||
|
|||||||
Reference in New Issue
Block a user