diff --git a/cookbook/helper/ingredient_parser.py b/cookbook/helper/ingredient_parser.py index b773c2f4c..3ce5170ba 100644 --- a/cookbook/helper/ingredient_parser.py +++ b/cookbook/helper/ingredient_parser.py @@ -1,3 +1,4 @@ +import re import string import unicodedata @@ -25,17 +26,12 @@ def parse_amount(x): did_check_frac = False end = 0 - while ( - end < len(x) - and ( - x[end] in string.digits - or ( - (x[end] == '.' or x[end] == ',' or x[end] == '/') - and end + 1 < len(x) - and x[end + 1] in string.digits - ) - ) - ): + while (end < len(x) and (x[end] in string.digits + or ( + (x[end] == '.' or x[end] == ',' or x[end] == '/') + and end + 1 < len(x) + and x[end + 1] in string.digits + ))): end += 1 if end > 0: if "/" in x[:end]: @@ -55,6 +51,9 @@ def parse_amount(x): unit = x[end + 1:] except ValueError: unit = x[end:] + + if unit.startswith('('): # i dont know any unit that starts with ( so its likely an alternative like 1L (500ml) Water + unit = '' return amount, unit @@ -107,6 +106,12 @@ def parse(x): ingredient = '' note = '' + # if the string contains parenthesis early on remove it and place it at the end + # because its likely some kind of note + if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', x): + match = re.search('\((.[^\(])+\)', x) + x = x[:match.start()] + x[match.end():] + ' ' + x[match.start():match.end()] + tokens = x.split() if len(tokens) == 1: # there only is one argument, that must be the ingredient @@ -115,16 +120,17 @@ def parse(x): try: # try to parse first argument as amount amount, unit = parse_amount(tokens[0]) + print('test', unit) # only try to parse second argument as amount if there are at least # three arguments if it already has a unit there can't be # a fraction for the amount if len(tokens) > 2: try: if not unit == '': - # a unit is already found, no need to try the second argument for a fraction # noqa: E501 + # a unit is already found, no need to try the second argument for a fraction # probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except # noqa: E501 raise ValueError - # try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½' # noqa: E501 + # try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½' amount += parse_fraction(tokens[1]) # assume that units can't end with a comma if len(tokens) > 3 and not tokens[2].endswith(','): @@ -142,7 +148,10 @@ def parse(x): # try to use second argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501 try: ingredient, note = parse_ingredient(tokens[2:]) - unit = tokens[1] + if unit == '': + unit = tokens[1] + else: + note = tokens[1] except ValueError: ingredient, note = parse_ingredient(tokens[1:]) else: diff --git a/cookbook/tests/other/test_ingredient_parser.py b/cookbook/tests/other/test_ingredient_parser.py index 076df4b04..f20706166 100644 --- a/cookbook/tests/other/test_ingredient_parser.py +++ b/cookbook/tests/other/test_ingredient_parser.py @@ -53,7 +53,10 @@ def test_ingredient_parser(): "50 g smör eller margarin": (50, "g", "smör eller margarin", ""), "3,5 l Wasser": (3.5, "l", "Wasser", ""), "3.5 l Wasser": (3.5, "l", "Wasser", ""), - "400 g Karotte(n)": (400, "g", "Karotte(n)", "") + "400 g Karotte(n)": (400, "g", "Karotte(n)", ""), + "400g unsalted butter": (400, "g", "butter", "unsalted"), + "2L Wasser": (2, "L", "Wasser", ""), + "1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"), } # for German you could say that if an ingredient does not have # an amount # and it starts with a lowercase letter, then that