This commit is contained in:
smilerz
2024-07-09 08:01:39 -05:00
parent c4ff29beda
commit a02582e9f8
7 changed files with 28 additions and 145 deletions

View File

@@ -1,68 +0,0 @@
import json
from recipe_scrapers._abstract import AbstractScraper
class CooksIllustrated(AbstractScraper):
@classmethod
def host(cls, site='cooksillustrated'):
return {
'cooksillustrated': f"{site}.com",
'americastestkitchen': f"{site}.com",
'cookscountry': f"{site}.com",
}.get(site)
def title(self):
return self.schema.title()
def image(self):
return self.schema.image()
def total_time(self):
if not self.recipe:
self.get_recipe()
return self.recipe['recipeTimeNote']
def yields(self):
if not self.recipe:
self.get_recipe()
return self.recipe['yields']
def ingredients(self):
if not self.recipe:
self.get_recipe()
ingredients = []
for group in self.recipe['ingredientGroups']:
ingredients += group['fields']['recipeIngredientItems']
return [
"{} {} {}{}".format(
i['fields']['qty'] or '',
i['fields']['measurement'] or '',
i['fields']['ingredient']['fields']['title'] or '',
i['fields']['postText'] or ''
)
for i in ingredients
]
def instructions(self):
if not self.recipe:
self.get_recipe()
if self.recipe.get('headnote', False):
i = ['Note: ' + self.recipe.get('headnote', '')]
else:
i = []
return "\n".join(
i
+ [self.recipe.get('whyThisWorks', '')]
+ [
instruction['fields']['content']
for instruction in self.recipe['instructions']
]
)
def nutrients(self):
raise NotImplementedError("This should be implemented.")
def get_recipe(self):
j = json.loads(self.soup.find(type='application/json').string)
name = list(j['props']['initialState']['content']['documents'])[0]
self.recipe = j['props']['initialState']['content']['documents'][name]

View File

@@ -1,43 +0,0 @@
from json import JSONDecodeError
from bs4 import BeautifulSoup
from recipe_scrapers import SCRAPERS, get_host_name
from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
from .cooksillustrated import CooksIllustrated
CUSTOM_SCRAPERS = {
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
}
SCRAPERS.update(CUSTOM_SCRAPERS)
def text_scraper(text, url=None):
domain = None
if url:
domain = get_host_name(url)
if domain in SCRAPERS:
scraper_class = SCRAPERS[domain]
else:
scraper_class = SchemaScraperFactory.SchemaScraper
class TextScraper(scraper_class):
def __init__(
self,
html=None,
url=None,
):
self.supported_only = False
self.meta_http_equiv = False
self.soup = BeautifulSoup(html, "html.parser")
self.url = url
self.recipe = None
try:
self.schema = SchemaOrg(html)
except (JSONDecodeError, AttributeError):
pass
return TextScraper(url=url, html=text)