Merge pull request #1984 from andyjayne/fix-nl-ingredients

fix: ingredient parsing for non-latin languages
This commit is contained in:
vabene1111
2022-09-09 18:07:55 +02:00
committed by GitHub
2 changed files with 5 additions and 3 deletions

View File

@@ -221,8 +221,8 @@ class IngredientParser:
# some people/languages put amount and unit at the end of the ingredient string # some people/languages put amount and unit at the end of the ingredient string
# if something like this is detected move it to the beginning so the parser can handle it # if something like this is detected move it to the beginning so the parser can handle it
if len(ingredient) < 1000 and re.search(r'^([A-z])+(.)*[1-9](\d)*\s([A-z])+', ingredient): if len(ingredient) < 1000 and re.search(r'^([^\W\d_])+(.)*[1-9](\d)*\s*([^\W\d_])+', ingredient):
match = re.search(r'[1-9](\d)*\s([A-z])+', ingredient) match = re.search(r'[1-9](\d)*\s*([^\W\d_])+', ingredient)
print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}') print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}')
ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '') ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '')

View File

@@ -66,7 +66,9 @@ def test_ingredient_parser():
1.0, 'Lorem', 'ipsum', 'dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l'), 1.0, 'Lorem', 'ipsum', 'dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l'),
"1 LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl": ( "1 LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl": (
1.0, None, 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingeli', 1.0, None, 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingeli',
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl') 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl'),
"砂糖 50g": (50, "g", "砂糖", ""),
"卵 4個": (4, "", "", "")
} }
# for German you could say that if an ingredient does not have # for German you could say that if an ingredient does not have