diff --git a/cookbook/helper/recipe_url_import.py b/cookbook/helper/recipe_url_import.py index 49f6b05f0..4a9c7f0d7 100644 --- a/cookbook/helper/recipe_url_import.py +++ b/cookbook/helper/recipe_url_import.py @@ -1,18 +1,16 @@ import json import random import re -import unicodedata from json import JSONDecodeError import microdata from bs4 import BeautifulSoup +from cookbook.helper.ingredient_parser import parse as parse_ingredient +from cookbook.models import Keyword from django.http import JsonResponse from django.utils.dateparse import parse_duration from django.utils.translation import gettext as _ -from cookbook.models import Keyword -from cookbook.helper.ingredient_parser import parse as parse_ingredient - def get_from_html(html_text, url): soup = BeautifulSoup(html_text, "html.parser") @@ -31,10 +29,16 @@ def get_from_html(html_text, url): if '@type' in x and x['@type'] == 'Recipe': ld_json_item = x - if '@type' in ld_json_item and ld_json_item['@type'] == 'Recipe': + if ('@type' in ld_json_item + and ld_json_item['@type'] == 'Recipe'): return find_recipe_json(ld_json_item, url) - except JSONDecodeError as e: - return JsonResponse({'error': True, 'msg': _('The requested site provided malformed data and cannot be read.')}, status=400) + except JSONDecodeError: + return JsonResponse( + { + 'error': True, + 'msg': _('The requested site provided malformed data and cannot be read.') # noqa: E501 + }, + status=400) # now try to find microdata items = microdata.get_items(html_text) @@ -43,14 +47,19 @@ def get_from_html(html_text, url): if 'schema.org/Recipe' in str(md_json['type']): return find_recipe_json(md_json['properties'], url) - return JsonResponse({'error': True, 'msg': _('The requested site does not provide any recognized data format to import the recipe from.')}, status=400) + return JsonResponse( + { + 'error': True, + 'msg': _('The requested site does not provide any recognized data format to import the recipe from.') # noqa: E501 + }, + status=400) def find_recipe_json(ld_json, url): if type(ld_json['name']) == list: try: ld_json['name'] = ld_json['name'][0] - except: + except Exception: ld_json['name'] = 'ERROR' # some sites use ingredients instead of recipeIngredients @@ -59,8 +68,9 @@ def find_recipe_json(ld_json, url): if 'recipeIngredient' in ld_json: # some pages have comma separated ingredients in a single array entry - if len(ld_json['recipeIngredient']) == 1 and len(ld_json['recipeIngredient'][0]) > 30: - ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',') + if (len(ld_json['recipeIngredient']) == 1 + and len(ld_json['recipeIngredient'][0]) > 30): + ld_json['recipeIngredient'] = ld_json['recipeIngredient'][0].split(',') # noqa: E501 for x in ld_json['recipeIngredient']: if '\n' in x: @@ -71,13 +81,41 @@ def find_recipe_json(ld_json, url): ingredients = [] for x in ld_json['recipeIngredient']: - if x.replace(' ','') != '': + if x.replace(' ', '') != '': try: amount, unit, ingredient, note = parse_ingredient(x) if ingredient: - ingredients.append({'amount': amount, 'unit': {'text': unit, 'id': random.randrange(10000, 99999)}, 'ingredient': {'text': ingredient, 'id': random.randrange(10000, 99999)}, "note": note, 'original': x}) - except: - ingredients.append({'amount': 0, 'unit': {'text': "", 'id': random.randrange(10000, 99999)}, 'ingredient': {'text': x, 'id': random.randrange(10000, 99999)}, "note": "", 'original': x}) + ingredients.append( + { + 'amount': amount, + 'unit': { + 'text': unit, + 'id': random.randrange(10000, 99999) + }, + 'ingredient': { + 'text': ingredient, + 'id': random.randrange(10000, 99999) + }, + 'note': note, + 'original': x + } + ) + except Exception: + ingredients.append( + { + 'amount': 0, + 'unit': { + 'text': '', + 'id': random.randrange(10000, 99999) + }, + 'ingredient': { + 'text': x, + 'id': random.randrange(10000, 99999) + }, + 'note': '', + 'original': x + } + ) ld_json['recipeIngredient'] = ingredients else: @@ -91,7 +129,9 @@ def find_recipe_json(ld_json, url): ld_json['keywords'] = ld_json['keywords'].split(',') # keywords as string in list - if type(ld_json['keywords']) == list and len(ld_json['keywords']) == 1 and ',' in ld_json['keywords'][0]: + if (type(ld_json['keywords']) == list + and len(ld_json['keywords']) == 1 + and ',' in ld_json['keywords'][0]): ld_json['keywords'] = ld_json['keywords'][0].split(',') # keywords as list @@ -126,10 +166,10 @@ def find_recipe_json(ld_json, url): instructions += str(i) ld_json['recipeInstructions'] = instructions - ld_json['recipeInstructions'] = re.sub(r'\n\s*\n', '\n\n', ld_json['recipeInstructions']) - ld_json['recipeInstructions'] = re.sub(' +', ' ', ld_json['recipeInstructions']) - ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('
', '') - ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('
', '') + ld_json['recipeInstructions'] = re.sub(r'\n\s*\n', '\n\n', ld_json['recipeInstructions']) # noqa: E501 + ld_json['recipeInstructions'] = re.sub(' +', ' ', ld_json['recipeInstructions']) # noqa: E501 + ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('', '') # noqa: E501 + ld_json['recipeInstructions'] = ld_json['recipeInstructions'].replace('
', '') # noqa: E501 else: ld_json['recipeInstructions'] = '' @@ -149,9 +189,14 @@ def find_recipe_json(ld_json, url): if 'cookTime' in ld_json: try: - if type(ld_json['cookTime']) == list and len(ld_json['cookTime']) > 0: + if (type(ld_json['cookTime']) == list + and len(ld_json['cookTime']) > 0): ld_json['cookTime'] = ld_json['cookTime'][0] - ld_json['cookTime'] = round(parse_duration(ld_json['cookTime']).seconds / 60) + ld_json['cookTime'] = round( + parse_duration( + ld_json['cookTime'] + ).seconds / 60 + ) except TypeError: ld_json['cookTime'] = 0 else: @@ -159,16 +204,24 @@ def find_recipe_json(ld_json, url): if 'prepTime' in ld_json: try: - if type(ld_json['prepTime']) == list and len(ld_json['prepTime']) > 0: + if (type(ld_json['prepTime']) == list + and len(ld_json['prepTime']) > 0): ld_json['prepTime'] = ld_json['prepTime'][0] - ld_json['prepTime'] = round(parse_duration(ld_json['prepTime']).seconds / 60) + ld_json['prepTime'] = round( + parse_duration( + ld_json['prepTime'] + ).seconds / 60 + ) except TypeError: ld_json['prepTime'] = 0 else: ld_json['prepTime'] = 0 for key in list(ld_json): - if key not in ['prepTime', 'cookTime', 'image', 'recipeInstructions', 'keywords', 'name', 'recipeIngredient']: + if key not in [ + 'prepTime', 'cookTime', 'image', 'recipeInstructions', + 'keywords', 'name', 'recipeIngredient' + ]: ld_json.pop(key, None) return JsonResponse(ld_json)