mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-05 14:18:51 -05:00
Merge branch 'develop' of https://github.com/TandoorRecipes/recipes into develop
This commit is contained in:
@@ -9,6 +9,8 @@ from recipe_scrapers._utils import get_host_name, normalize_string
|
||||
|
||||
from cookbook.helper import recipe_url_import as helper
|
||||
from cookbook.helper.scrapers.scrapers import text_scraper
|
||||
from recipe_scrapers import scrape_me
|
||||
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
|
||||
|
||||
|
||||
def get_recipe_from_source(text, url, request):
|
||||
@@ -63,34 +65,41 @@ def get_recipe_from_source(text, url, request):
|
||||
html_data = []
|
||||
images = []
|
||||
text = unquote(text)
|
||||
scrape = None
|
||||
|
||||
try:
|
||||
parse_list.append(remove_graph(json.loads(text)))
|
||||
if not url and 'url' in parse_list[0]:
|
||||
url = parse_list[0]['url']
|
||||
scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
|
||||
if url:
|
||||
try:
|
||||
scrape = scrape_me(url_path=url, wild_mode=True)
|
||||
except(NoSchemaFoundInWildMode):
|
||||
pass
|
||||
if not scrape:
|
||||
try:
|
||||
parse_list.append(remove_graph(json.loads(text)))
|
||||
if not url and 'url' in parse_list[0]:
|
||||
url = parse_list[0]['url']
|
||||
scrape = text_scraper("<script type='application/ld+json'>" + text + "</script>", url=url)
|
||||
|
||||
except JSONDecodeError:
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
html_data = get_from_html(soup)
|
||||
images += get_images_from_source(soup, url)
|
||||
for el in soup.find_all('script', type='application/ld+json'):
|
||||
el = remove_graph(el)
|
||||
if not url and 'url' in el:
|
||||
url = el['url']
|
||||
if type(el) == list:
|
||||
for le in el:
|
||||
parse_list.append(le)
|
||||
elif type(el) == dict:
|
||||
parse_list.append(el)
|
||||
for el in soup.find_all(type='application/json'):
|
||||
el = remove_graph(el)
|
||||
if type(el) == list:
|
||||
for le in el:
|
||||
parse_list.append(le)
|
||||
elif type(el) == dict:
|
||||
parse_list.append(el)
|
||||
scrape = text_scraper(text, url=url)
|
||||
except JSONDecodeError:
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
html_data = get_from_html(soup)
|
||||
images += get_images_from_source(soup, url)
|
||||
for el in soup.find_all('script', type='application/ld+json'):
|
||||
el = remove_graph(el)
|
||||
if not url and 'url' in el:
|
||||
url = el['url']
|
||||
if type(el) == list:
|
||||
for le in el:
|
||||
parse_list.append(le)
|
||||
elif type(el) == dict:
|
||||
parse_list.append(el)
|
||||
for el in soup.find_all(type='application/json'):
|
||||
el = remove_graph(el)
|
||||
if type(el) == list:
|
||||
for le in el:
|
||||
parse_list.append(le)
|
||||
elif type(el) == dict:
|
||||
parse_list.append(el)
|
||||
scrape = text_scraper(text, url=url)
|
||||
|
||||
recipe_json = helper.get_from_scraper(scrape, request)
|
||||
|
||||
|
||||
@@ -114,7 +114,14 @@ def get_from_scraper(scrape, request):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if source_url := scrape.url:
|
||||
try:
|
||||
source_url = scrape.canonical_url()
|
||||
except Exception:
|
||||
try:
|
||||
source_url = scrape.url
|
||||
except Exception:
|
||||
pass
|
||||
if source_url:
|
||||
recipe_json['source_url'] = source_url
|
||||
try:
|
||||
keywords.append(source_url.replace('http://', '').replace('https://', '').split('/')[0])
|
||||
@@ -129,9 +136,11 @@ def get_from_scraper(scrape, request):
|
||||
ingredient_parser = IngredientParser(request, True)
|
||||
|
||||
recipe_json['steps'] = []
|
||||
|
||||
for i in parse_instructions(scrape.instructions()):
|
||||
recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
|
||||
try:
|
||||
for i in parse_instructions(scrape.instructions()):
|
||||
recipe_json['steps'].append({'instruction': i, 'ingredients': [], })
|
||||
except Exception:
|
||||
pass
|
||||
if len(recipe_json['steps']) == 0:
|
||||
recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from json import JSONDecodeError
|
||||
from recipe_scrapers import SCRAPERS, get_host_name
|
||||
from recipe_scrapers import SCRAPERS
|
||||
from recipe_scrapers._factory import SchemaScraperFactory
|
||||
from recipe_scrapers._schemaorg import SchemaOrg
|
||||
|
||||
@@ -15,13 +15,7 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
|
||||
|
||||
|
||||
def text_scraper(text, url=None):
|
||||
domain = None
|
||||
if url:
|
||||
domain = get_host_name(url)
|
||||
if domain in SCRAPERS:
|
||||
scraper_class = SCRAPERS[domain]
|
||||
else:
|
||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
||||
|
||||
class TextScraper(scraper_class):
|
||||
def __init__(
|
||||
|
||||
@@ -27,7 +27,7 @@ class Paprika(Integration):
|
||||
recipe.description = '' if len(recipe_json['description'].strip()) > 500 else recipe_json['description'].strip()
|
||||
|
||||
try:
|
||||
if 'servings' in recipe_json['servings']:
|
||||
if 'servings' in recipe_json:
|
||||
recipe.servings = parse_servings(recipe_json['servings'])
|
||||
recipe.servings_text = parse_servings_text(recipe_json['servings'])
|
||||
|
||||
|
||||
@@ -78,7 +78,11 @@ class Plantoeat(Integration):
|
||||
current_recipe = ''
|
||||
|
||||
for fl in file.readlines():
|
||||
line = fl.decode("windows-1250")
|
||||
try:
|
||||
line = fl.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
line = fl.decode("windows-1250")
|
||||
|
||||
if line.startswith('--------------'):
|
||||
if current_recipe != '':
|
||||
recipe_list.append(current_recipe)
|
||||
|
||||
BIN
cookbook/locale/bg/LC_MESSAGES/django.mo
Normal file
BIN
cookbook/locale/bg/LC_MESSAGES/django.mo
Normal file
Binary file not shown.
BIN
cookbook/locale/da/LC_MESSAGES/django.mo
Normal file
BIN
cookbook/locale/da/LC_MESSAGES/django.mo
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user