From 012a1a79159488ed1923b31ebcbe4af129010008 Mon Sep 17 00:00:00 2001 From: vabene1111 Date: Sat, 23 Apr 2022 13:03:15 +0200 Subject: [PATCH] ingredient parser produces expected results again --- cookbook/helper/ingredient_parser.py | 95 ++++++++++--------- cookbook/templates/test.html | 18 +--- .../tests/other/test_ingredient_parser.py | 4 +- cookbook/views/views.py | 11 ++- 4 files changed, 62 insertions(+), 66 deletions(-) diff --git a/cookbook/helper/ingredient_parser.py b/cookbook/helper/ingredient_parser.py index ee4b8d916..b310443ef 100644 --- a/cookbook/helper/ingredient_parser.py +++ b/cookbook/helper/ingredient_parser.py @@ -46,7 +46,7 @@ class IngredientParser: def apply_food_automation(self, food): """ - Apply food alias automations to passed foood + Apply food alias automations to passed food :param food: unit as string :return: food as string (possibly changed by automation) """ @@ -155,33 +155,36 @@ class IngredientParser: except ValueError: unit = x[end:] + if unit is not None and unit.strip() == '': + unit = None + if unit is not None and (unit.startswith('(') or unit.startswith('-')): # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3 - unit = '' + unit = None note = x return amount, unit, note - def parse_ingredient_with_comma(self, tokens): - ingredient = '' + def parse_food_with_comma(self, tokens): + food = '' note = '' start = 0 # search for first occurrence of an argument ending in a comma while start < len(tokens) and not tokens[start].endswith(','): start += 1 if start == len(tokens): - # no token ending in a comma found -> use everything as ingredient - ingredient = ' '.join(tokens) + # no token ending in a comma found -> use everything as food + food = ' '.join(tokens) else: - ingredient = ' '.join(tokens[:start + 1])[:-1] + food = ' '.join(tokens[:start + 1])[:-1] note = ' '.join(tokens[start + 1:]) - return ingredient, note + return food, note - def parse_ingredient(self, tokens): - ingredient = '' + def parse_food(self, tokens): + food = '' note = '' if tokens[-1].endswith(')'): # Check if the matching opening bracket is in the same token if (not tokens[-1].startswith('(')) and ('(' in tokens[-1]): - return self.parse_ingredient_with_comma(tokens) + return self.parse_food_with_comma(tokens) # last argument ends with closing bracket -> look for opening bracket start = len(tokens) - 1 while not tokens[start].startswith('(') and not start == 0: @@ -191,36 +194,41 @@ class IngredientParser: raise ValueError elif start < 0: # no opening bracket anywhere -> just ignore the last bracket - ingredient, note = self.parse_ingredient_with_comma(tokens) + food, note = self.parse_food_with_comma(tokens) else: - # opening bracket found -> split in ingredient and note, remove brackets from note # noqa: E501 + # opening bracket found -> split in food and note, remove brackets from note # noqa: E501 note = ' '.join(tokens[start:])[1:-1] - ingredient = ' '.join(tokens[:start]) + food = ' '.join(tokens[:start]) else: - ingredient, note = self.parse_ingredient_with_comma(tokens) - return ingredient, note + food, note = self.parse_food_with_comma(tokens) + return food, note - def parse(self, x): + def parse(self, ingredient): + """ + Main parsing function, takes an ingredient string (e.g. '1 l Water') and extracts amount, unit, food, ... + :param ingredient: string ingredient + :return: amount, unit (can be None), food, note (can be empty) + """ # initialize default values amount = 0 unit = None - ingredient = '' + food = '' note = '' unit_note = '' - if len(x) == 0: + if len(ingredient) == 0: raise ValueError('string to parse cannot be empty') # if the string contains parenthesis early on remove it and place it at the end # because its likely some kind of note - if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', x): - match = re.search('\((.[^\(])+\)', x) - x = x[:match.start()] + x[match.end():] + ' ' + x[match.start():match.end()] + if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', ingredient): + match = re.search('\((.[^\(])+\)', ingredient) + ingredient = ingredient[:match.start()] + ingredient[match.end():] + ' ' + ingredient[match.start():match.end()] - tokens = x.split() + tokens = ingredient.split() # split at each space into tokens if len(tokens) == 1: - # there only is one argument, that must be the ingredient - ingredient = tokens[0] + # there only is one argument, that must be the food + food = tokens[0] else: try: # try to parse first argument as amount @@ -232,51 +240,50 @@ class IngredientParser: try: if unit is not None: # a unit is already found, no need to try the second argument for a fraction - # probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except # noqa: E501 + # probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except raise ValueError # try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½' amount += self.parse_fraction(tokens[1]) # assume that units can't end with a comma if len(tokens) > 3 and not tokens[2].endswith(','): - # try to use third argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501 + # try to use third argument as unit and everything else as food, use everything as food if it fails try: - ingredient, note = self.parse_ingredient(tokens[3:]) + food, note = self.parse_food(tokens[3:]) unit = tokens[2] except ValueError: - ingredient, note = self.parse_ingredient(tokens[2:]) + food, note = self.parse_food(tokens[2:]) else: - ingredient, note = self.parse_ingredient(tokens[2:]) + food, note = self.parse_food(tokens[2:]) except ValueError: # assume that units can't end with a comma if not tokens[1].endswith(','): - # try to use second argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501 + # try to use second argument as unit and everything else as food, use everything as food if it fails try: - ingredient, note = self.parse_ingredient(tokens[2:]) + food, note = self.parse_food(tokens[2:]) if unit is None: unit = tokens[1] else: note = tokens[1] except ValueError: - ingredient, note = self.parse_ingredient(tokens[1:]) + food, note = self.parse_food(tokens[1:]) else: - ingredient, note = self.parse_ingredient(tokens[1:]) + food, note = self.parse_food(tokens[1:]) else: # only two arguments, first one is the amount - # which means this is the ingredient - ingredient = tokens[1] + # which means this is the food + food = tokens[1] except ValueError: try: # can't parse first argument as amount - # -> no unit -> parse everything as ingredient - ingredient, note = self.parse_ingredient(tokens) + # -> no unit -> parse everything as food + food, note = self.parse_food(tokens) except ValueError: - ingredient = ' '.join(tokens[1:]) + food = ' '.join(tokens[1:]) if unit_note not in note: note += ' ' + unit_note - try: - unit = self.apply_unit_automation(unit.strip()) - except Exception: - pass - return amount, unit, self.apply_food_automation(ingredient.strip()), note.strip() + if unit: + unit = self.apply_unit_automation(unit.strip()) + + return amount, unit, self.apply_food_automation(food.strip()), note.strip() diff --git a/cookbook/templates/test.html b/cookbook/templates/test.html index f0c69a800..f1c083de8 100644 --- a/cookbook/templates/test.html +++ b/cookbook/templates/test.html @@ -10,27 +10,11 @@ {% block content_fluid %} -
- - -
- + {{ data }} {% endblock %} {% block script %} - {% if debug %} - - {% else %} - - {% endif %} - - - {% render_bundle 'import_view' %} {% endblock %} \ No newline at end of file diff --git a/cookbook/tests/other/test_ingredient_parser.py b/cookbook/tests/other/test_ingredient_parser.py index d2d6b144a..64ea58e70 100644 --- a/cookbook/tests/other/test_ingredient_parser.py +++ b/cookbook/tests/other/test_ingredient_parser.py @@ -4,7 +4,7 @@ from cookbook.helper.ingredient_parser import IngredientParser def test_ingredient_parser(): expectations = { "2¼ l Wasser": (2.25, "l", "Wasser", ""), - "2¼l Wasser": (2.25, "l", "Wasser", ""), + "3¼l Wasser": (3.25, "l", "Wasser", ""), "¼ l Wasser": (0.25, "l", "Wasser", ""), "3l Wasser": (3, "l", "Wasser", ""), "4 l Wasser": (4, "l", "Wasser", ""), @@ -58,7 +58,7 @@ def test_ingredient_parser(): "2L Wasser": (2, "L", "Wasser", ""), "1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"), "2-3 c Water": (2, "c", "Water", "2-3"), - "Pane (raffermo o secco) 80 g": (0, "", "Pane 80 g", "raffermo o secco"), # TODO this is actually not a good result but currently expected + "Pane (raffermo o secco) 80 g": (0, None, "Pane 80 g", "raffermo o secco"), # TODO this is actually not a good result but currently expected } # for German you could say that if an ingredient does not have # an amount # and it starts with a lowercase letter, then that diff --git a/cookbook/views/views.py b/cookbook/views/views.py index f47eed694..1234cd045 100644 --- a/cookbook/views/views.py +++ b/cookbook/views/views.py @@ -662,10 +662,15 @@ def test(request): if not settings.DEBUG: return HttpResponseRedirect(reverse('index')) - if (api_token := Token.objects.filter(user=request.user).first()) is None: - api_token = Token.objects.create(user=request.user) + from cookbook.helper.ingredient_parser import IngredientParser + parser = IngredientParser(request, False) - return render(request, 'test.html', {'api_token': api_token}) + data = { + 'original': 'Pane (raffermo o secco) 80 g' + } + data['parsed'] = parser.parse(data['original']) + + return render(request, 'test.html', {'data': data}) def test2(request):