Merge pull request #1984 from andyjayne/fix-nl-ingredients

fix: ingredient parsing for non-latin languages
This commit is contained in:
vabene1111
2022-09-09 18:07:55 +02:00
committed by GitHub
2 changed files with 5 additions and 3 deletions

View File

@@ -221,8 +221,8 @@ class IngredientParser:
# some people/languages put amount and unit at the end of the ingredient string
# if something like this is detected move it to the beginning so the parser can handle it
if len(ingredient) < 1000 and re.search(r'^([A-z])+(.)*[1-9](\d)*\s([A-z])+', ingredient):
match = re.search(r'[1-9](\d)*\s([A-z])+', ingredient)
if len(ingredient) < 1000 and re.search(r'^([^\W\d_])+(.)*[1-9](\d)*\s*([^\W\d_])+', ingredient):
match = re.search(r'[1-9](\d)*\s*([^\W\d_])+', ingredient)
print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}')
ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '')

View File

@@ -66,7 +66,9 @@ def test_ingredient_parser():
1.0, 'Lorem', 'ipsum', 'dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l'),
"1 LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl": (
1.0, None, 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingeli',
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl')
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl'),
"砂糖 50g": (50, "g", "砂糖", ""),
"卵 4個": (4, "", "", "")
}
# for German you could say that if an ingredient does not have