basic url importer working

This commit is contained in:
vabene1111
2024-12-08 21:54:14 +01:00
parent da567a9d6c
commit e3f20459dd
206 changed files with 1217 additions and 18329 deletions

View File

@@ -424,9 +424,9 @@ def parse_keywords(keyword_json, request):
if len(kw) != 0:
kw = automation_engine.apply_keyword_automation(kw)
if k := Keyword.objects.filter(name__iexact=kw, space=request.space).first():
keywords.append({'label': str(k), 'name': k.name, 'id': k.id})
keywords.append({'label': str(k), 'name': k.name, 'id': k.id, 'import_keyword': True})
else:
keywords.append({'label': kw, 'name': kw})
keywords.append({'label': kw, 'name': kw, 'import_keyword': False})
return keywords

View File

@@ -1559,6 +1559,60 @@ class RecipeFromSourceSerializer(serializers.Serializer):
data = serializers.CharField(required=False, allow_null=True, allow_blank=True)
bookmarklet = serializers.IntegerField(required=False, allow_null=True, )
class SourceImportFoodSerializer(serializers.Serializer):
name = serializers.CharField()
class SourceImportUnitSerializer(serializers.Serializer):
name = serializers.CharField()
class SourceImportIngredientSerializer(serializers.Serializer):
amount = serializers.FloatField()
food = SourceImportFoodSerializer()
unit = SourceImportUnitSerializer()
note = serializers.CharField(required=False)
original_text = serializers.CharField()
class SourceImportStepSerializer(serializers.Serializer):
instruction = serializers.CharField()
ingredients = SourceImportIngredientSerializer(many=True)
show_ingredients_table = serializers.BooleanField()
class SourceImportKeywordSerializer(serializers.Serializer):
id = serializers.IntegerField(allow_null=True)
label = serializers.CharField()
name = serializers.CharField()
import_keyword = serializers.BooleanField()
class SourceImportPropertyTypeSerializer(serializers.Serializer):
id = serializers.IntegerField()
name = serializers.CharField()
class SourceImportPropertySerializer(serializers.Serializer):
property_type = SourceImportPropertyTypeSerializer(many=False)
property_amount = serializers.FloatField()
class SourceImportRecipeSerializer(serializers.Serializer):
steps = SourceImportStepSerializer(many=True)
internal = serializers.BooleanField()
source_url = serializers.URLField()
name = serializers.CharField()
description = serializers.CharField()
servings = serializers.IntegerField()
servings_text = serializers.CharField()
working_time = serializers.IntegerField()
waiting_time = serializers.IntegerField()
image = serializers.URLField()
keywords = SourceImportKeywordSerializer(many=True)
properties = serializers.ListField(child=SourceImportPropertySerializer())
class RecipeFromSourceResponseSerializer(serializers.Serializer):
recipe = SourceImportRecipeSerializer(default=None)
images = serializers.ListField( default=[])
error = serializers.BooleanField(default=False)
msg = serializers.CharField(max_length=1024, default='')
duplicate = serializers.ListField(child=serializers.IntegerField(), default=[])
class ImportImageSerializer(serializers.Serializer):
image = serializers.ImageField()

View File

@@ -105,7 +105,7 @@ from cookbook.serializer import (AccessTokenSerializer, AutomationSerializer, Au
SupermarketSerializer, SyncLogSerializer, SyncSerializer,
UnitConversionSerializer, UnitSerializer, UserFileSerializer, UserPreferenceSerializer,
UserSerializer, UserSpaceSerializer, ViewLogSerializer, ImportImageSerializer,
LocalizationSerializer, ServerSettingsSerializer
LocalizationSerializer, ServerSettingsSerializer, RecipeFromSourceResponseSerializer
)
from cookbook.version_info import TANDOOR_VERSION
from cookbook.views.import_export import get_integration
@@ -1655,11 +1655,12 @@ class CustomAuthToken(ObtainAuthToken):
})
# TODO implement proper schema https://drf-spectacular.readthedocs.io/en/latest/customization.html#replace-views-with-openapiviewextension
class RecipeUrlImportView(APIView):
throttle_classes = [RecipeImportThrottle]
permission_classes = [CustomIsUser & CustomTokenHasReadWriteScope]
# TODO add response serializer
@extend_schema(request=RecipeFromSourceSerializer(many=False), responses=RecipeFromSourceResponseSerializer(many=False))
def post(self, request, *args, **kwargs):
"""
function to retrieve a recipe from a given url or source string
@@ -1671,6 +1672,8 @@ class RecipeUrlImportView(APIView):
"""
scrape = None
serializer = RecipeFromSourceSerializer(data=request.data)
response = {}
if serializer.is_valid():
if (b_pk := serializer.validated_data.get('bookmarklet', None)) and (
@@ -1680,21 +1683,17 @@ class RecipeUrlImportView(APIView):
bookmarklet.delete()
url = serializer.validated_data.get('url', None)
data = unquote(serializer.validated_data.get('data', None))
duplicate = False
if url:
# Check for existing recipes with provided url
existing_recipe = Recipe.objects.filter(source_url=url).first()
if existing_recipe:
duplicate = True
data = unquote(serializer.validated_data.get('data', ''))
if not url and not data:
return Response({'error': True, 'msg': _('Nothing to do.')}, status=status.HTTP_400_BAD_REQUEST)
response['error'] = True
response['msg'] = _('Nothing to do.')
return Response(RecipeFromSourceResponseSerializer().to_representation(response), status=status.HTTP_400_BAD_REQUEST)
elif url and not data:
if re.match('^(https?://)?(www\\.youtube\\.com|youtu\\.be)/.+$', url):
if validate_import_url(url):
# TODO new serializer
return Response({'recipe_json': get_from_youtube_scraper(url, request), 'recipe_images': [], 'duplicate': duplicate}, status=status.HTTP_200_OK)
if re.match('^(.)*/view/recipe/[0-9]+/[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$', url):
recipe_json = requests.get(
@@ -1713,6 +1712,7 @@ class RecipeUrlImportView(APIView):
filetype=pathlib.Path(recipe_json['image']).suffix),
name=f'{uuid.uuid4()}_{recipe.pk}{pathlib.Path(recipe_json["image"]).suffix}')
recipe.save()
# TODO new serializer
return Response({'link': request.build_absolute_uri(reverse('view_recipe', args={recipe.pk})), 'duplicate': duplicate}, status=status.HTTP_201_CREATED)
else:
try:
@@ -1724,16 +1724,19 @@ class RecipeUrlImportView(APIView):
).content
scrape = scrape_html(org_url=url, html=html, supported_only=False)
else:
return Response({'error': True, 'msg': _('Invalid Url')},
status=status.HTTP_400_BAD_REQUEST)
response['error'] = True
response['msg'] = _('Invalid Url')
return Response(RecipeFromSourceResponseSerializer().to_representation(response), status=status.HTTP_400_BAD_REQUEST)
except NoSchemaFoundInWildMode:
pass
except requests.exceptions.ConnectionError:
return Response({'error': True, 'msg': _('Connection Refused.')},
status=status.HTTP_400_BAD_REQUEST)
response['error'] = True
response['msg'] = _('Connection Refused.')
return Response(RecipeFromSourceResponseSerializer().to_representation(response), status=status.HTTP_400_BAD_REQUEST)
except requests.exceptions.MissingSchema:
return Response({'error': True, 'msg': _('Bad URL Schema.')},
status=status.HTTP_400_BAD_REQUEST)
response['error'] = True
response['msg'] = _('Bad URL Schema.')
return Response(RecipeFromSourceResponseSerializer().to_representation(response), status=status.HTTP_400_BAD_REQUEST)
else:
try:
data_json = json.loads(data)
@@ -1749,18 +1752,19 @@ class RecipeUrlImportView(APIView):
scrape = scrape_html(html=data, org_url=found_url, supported_only=False)
if scrape:
return Response({
'recipe_json': helper.get_from_scraper(scrape, request),
'recipe_images': list(dict.fromkeys(get_images_from_soup(scrape.soup, url))),
'duplicate': duplicate
},
status=status.HTTP_200_OK)
response['recipe'] = helper.get_from_scraper(scrape, request)
response['images'] = list(dict.fromkeys(get_images_from_soup(scrape.soup, url)))
response['duplicate'] = Recipe.objects.filter(source_url=url).values_list('id', flat=True).all()
return Response(RecipeFromSourceResponseSerializer(context={'request': request}).to_representation(response), status=status.HTTP_200_OK)
else:
return Response({'error': True, 'msg': _('No usable data could be found.')},
status=status.HTTP_400_BAD_REQUEST)
response['error'] = True
response['msg'] = _('No usable data could be found.')
return Response(RecipeFromSourceResponseSerializer().to_representation(response), status=status.HTTP_400_BAD_REQUEST)
else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
response['error'] = True
response['msg'] = serializer.errors
return Response(RecipeFromSourceResponseSerializer().to_representation(response), status=status.HTTP_400_BAD_REQUEST)
class ImageToRecipeView(APIView):