added support for unit/amount at end of ingredient

This commit is contained in:
vabene1111
2022-04-23 13:53:04 +02:00
parent 012a1a7915
commit e0b7d1a8f0
3 changed files with 10 additions and 2 deletions

View File

@@ -219,6 +219,13 @@ class IngredientParser:
if len(ingredient) == 0:
raise ValueError('string to parse cannot be empty')
# some people/languages put amount and unit at the end of the ingredient string
# if something like this is detected move it to the beginning so the parser can handle it
if re.search(r'^([A-z])+(.)*[1-9](\d)*\s([A-z])+', ingredient):
match = re.search(r'[1-9](\d)*\s([A-z])+', ingredient)
print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}')
ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '')
# if the string contains parenthesis early on remove it and place it at the end
# because its likely some kind of note
if re.match('(.){1,6}\s\((.[^\(\)])+\)\s', ingredient):

View File

@@ -58,7 +58,7 @@ def test_ingredient_parser():
"2L Wasser": (2, "L", "Wasser", ""),
"1 (16 ounce) package dry lentils, rinsed": (1, "package", "dry lentils, rinsed", "16 ounce"),
"2-3 c Water": (2, "c", "Water", "2-3"),
"Pane (raffermo o secco) 80 g": (0, None, "Pane 80 g", "raffermo o secco"), # TODO this is actually not a good result but currently expected
"Pane (raffermo o secco) 80 g": (80, "g", "Pane", "raffermo o secco"),
}
# for German you could say that if an ingredient does not have
# an amount # and it starts with a lowercase letter, then that
@@ -70,4 +70,5 @@ def test_ingredient_parser():
for key, val in expectations.items():
count += 1
parsed = ingredient_parser.parse(key)
print(f'testing if {key} becomes {val}')
assert parsed == val

View File

@@ -666,7 +666,7 @@ def test(request):
parser = IngredientParser(request, False)
data = {
'original': 'Pane (raffermo o secco) 80 g'
'original': 'Creme Frainche'
}
data['parsed'] = parser.parse(data['original'])