mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-03 05:11:31 -05:00
Merge pull request #1984 from andyjayne/fix-nl-ingredients
fix: ingredient parsing for non-latin languages
This commit is contained in:
@@ -221,8 +221,8 @@ class IngredientParser:
|
||||
|
||||
# some people/languages put amount and unit at the end of the ingredient string
|
||||
# if something like this is detected move it to the beginning so the parser can handle it
|
||||
if len(ingredient) < 1000 and re.search(r'^([A-z])+(.)*[1-9](\d)*\s([A-z])+', ingredient):
|
||||
match = re.search(r'[1-9](\d)*\s([A-z])+', ingredient)
|
||||
if len(ingredient) < 1000 and re.search(r'^([^\W\d_])+(.)*[1-9](\d)*\s*([^\W\d_])+', ingredient):
|
||||
match = re.search(r'[1-9](\d)*\s*([^\W\d_])+', ingredient)
|
||||
print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}')
|
||||
ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '')
|
||||
|
||||
|
||||
@@ -66,7 +66,9 @@ def test_ingredient_parser():
|
||||
1.0, 'Lorem', 'ipsum', 'dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l'),
|
||||
"1 LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl": (
|
||||
1.0, None, 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingeli',
|
||||
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl')
|
||||
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl'),
|
||||
"砂糖 50g": (50, "g", "砂糖", ""),
|
||||
"卵 4個": (4, "個", "卵", "")
|
||||
|
||||
}
|
||||
# for German you could say that if an ingredient does not have
|
||||
|
||||
Reference in New Issue
Block a user