mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-07 07:08:03 -05:00
Merge pull request #1984 from andyjayne/fix-nl-ingredients
fix: ingredient parsing for non-latin languages
This commit is contained in:
@@ -221,8 +221,8 @@ class IngredientParser:
|
|||||||
|
|
||||||
# some people/languages put amount and unit at the end of the ingredient string
|
# some people/languages put amount and unit at the end of the ingredient string
|
||||||
# if something like this is detected move it to the beginning so the parser can handle it
|
# if something like this is detected move it to the beginning so the parser can handle it
|
||||||
if len(ingredient) < 1000 and re.search(r'^([A-z])+(.)*[1-9](\d)*\s([A-z])+', ingredient):
|
if len(ingredient) < 1000 and re.search(r'^([^\W\d_])+(.)*[1-9](\d)*\s*([^\W\d_])+', ingredient):
|
||||||
match = re.search(r'[1-9](\d)*\s([A-z])+', ingredient)
|
match = re.search(r'[1-9](\d)*\s*([^\W\d_])+', ingredient)
|
||||||
print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}')
|
print(f'reording from {ingredient} to {ingredient[match.start():match.end()] + " " + ingredient.replace(ingredient[match.start():match.end()], "")}')
|
||||||
ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '')
|
ingredient = ingredient[match.start():match.end()] + ' ' + ingredient.replace(ingredient[match.start():match.end()], '')
|
||||||
|
|
||||||
|
|||||||
@@ -66,7 +66,9 @@ def test_ingredient_parser():
|
|||||||
1.0, 'Lorem', 'ipsum', 'dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l'),
|
1.0, 'Lorem', 'ipsum', 'dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l Lorem ipsum dolor sit amet consetetur sadipscing elitr sed diam nonumy eirmod tempor invidunt ut l'),
|
||||||
"1 LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl": (
|
"1 LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl": (
|
||||||
1.0, None, 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingeli',
|
1.0, None, 'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingeli',
|
||||||
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl')
|
'LoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutlLoremipsumdolorsitametconsetetursadipscingelitrseddiamnonumyeirmodtemporinviduntutl'),
|
||||||
|
"砂糖 50g": (50, "g", "砂糖", ""),
|
||||||
|
"卵 4個": (4, "個", "卵", "")
|
||||||
|
|
||||||
}
|
}
|
||||||
# for German you could say that if an ingredient does not have
|
# for German you could say that if an ingredient does not have
|
||||||
|
|||||||
Reference in New Issue
Block a user