bug fix url import

This commit is contained in:
smilerz
2021-04-05 21:41:00 -05:00
parent c50bd039ef
commit 4bc4ce0d7c
6 changed files with 63 additions and 117 deletions

View File

@@ -82,7 +82,12 @@ def get_recipe_from_source(text, url, space):
html_data = get_from_html(soup)
images += get_images_from_source(soup, url)
for el in soup.find_all('script', type='application/ld+json'):
parse_list.append(remove_graph(el))
el = remove_graph(el)
if type(el) == list:
for l in el:
parse_list.append(l)
else:
parse_list.append(el)
for el in soup.find_all(type='application/json'):
parse_list.append(remove_graph(el))

View File

@@ -39,8 +39,9 @@ def get_from_scraper(scrape, space):
pass
try:
recipe_json['image'] = scrape.image()
except AttributeError:
recipe_json['image'] = parse_image(scrape.image())
except (AttributeError, TypeError):
recipe_json['image'] = ''
pass
keywords = []
@@ -283,7 +284,8 @@ def parse_keywords(keyword_json, space):
for kw in keyword_json:
kw = normalize_string(kw)
if k := Keyword.objects.filter(name=kw, space=space).first():
keywords.append({'id': str(k.id), 'text': str(k)})
if len (k['text']) > 0:
keywords.append({'id': str(k.id), 'text': str(k)})
else:
keywords.append({'id': random.randrange(1111111, 9999999, 1), 'text': kw})

View File

@@ -57,7 +57,6 @@ class CooksIllustrated(AbstractScraper):
raise NotImplementedError("This should be implemented.")
def get_recipe(self):
# TODO add missing data to schema.data
j = json.loads(self.soup.find(type='application/json').string)
name = list(j['props']['initialState']['content']['documents'])[0]
self.recipe = j['props']['initialState']['content']['documents'][name]