diff --git a/cookbook/helper/recipe_html_import.py b/cookbook/helper/recipe_html_import.py index 427d07b95..1ebe1d27b 100644 --- a/cookbook/helper/recipe_html_import.py +++ b/cookbook/helper/recipe_html_import.py @@ -7,10 +7,7 @@ from bs4.element import Tag from cookbook.helper import recipe_url_import as helper -# %% - -# %% -def get_from_raw(text, space): +def get_recipe_from_source(text, space): def build_node(k, v): if isinstance(v, dict): node = { @@ -113,17 +110,20 @@ def get_from_raw(text, space): if '@graph' in el: for x in el['@graph']: if '@type' in x and x['@type'] == 'Recipe': - recipe_json = helper.find_recipe_json(x, None, space) - recipe_tree += [{'name': 'ld+json', 'children': temp_tree}] + el = x + + if '@type' in el and el['@type'] == 'Recipe': + recipe_json = helper.find_recipe_json(el, None, space) + recipe_tree += [{'name': 'ld+json', 'children': temp_tree}] else: recipe_tree += [{'name': 'json', 'children': temp_tree}] temp_tree = [] - # overide keyword structure from dict to list - kws = [] - for kw in recipe_json['keywords']: - kws.append(kw['text']) - recipe_json['keywords'] = kws - return recipe_json, recipe_tree + + +def get_from_html(text, space): + for s in soup.strings: + if ((s.parent.name not in INVISIBLE_ELEMS) and (len(s.strip()) > 0)): + print(s.parent.name, s, len(s)) \ No newline at end of file diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index 9ab62e44a..cfe3c13be 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -14,8 +14,8 @@ from recipe_scrapers._schemaorg import SchemaOrgException from recipe_scrapers._utils import get_minutes -def get_from_scraper(scrape, space): - # converting the scrape_me object to the existing json format based on ld+json +def get_from_html_old(html_text, url, space): + soup = BeautifulSoup(html_text, "html.parser") # first try finding ld+json as its most common for ld in soup.find_all('script', type='application/ld+json'): diff --git a/cookbook/templates/url_import.html b/cookbook/templates/url_import.html index a4d747f62..460a38151 100644 --- a/cookbook/templates/url_import.html +++ b/cookbook/templates/url_import.html @@ -25,72 +25,54 @@

{% trans 'Import' %}