mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-03 21:37:49 -05:00
rebase
This commit is contained in:
@@ -1,68 +0,0 @@
|
||||
import json
|
||||
from recipe_scrapers._abstract import AbstractScraper
|
||||
|
||||
|
||||
class CooksIllustrated(AbstractScraper):
|
||||
@classmethod
|
||||
def host(cls, site='cooksillustrated'):
|
||||
return {
|
||||
'cooksillustrated': f"{site}.com",
|
||||
'americastestkitchen': f"{site}.com",
|
||||
'cookscountry': f"{site}.com",
|
||||
}.get(site)
|
||||
|
||||
def title(self):
|
||||
return self.schema.title()
|
||||
|
||||
def image(self):
|
||||
return self.schema.image()
|
||||
|
||||
def total_time(self):
|
||||
if not self.recipe:
|
||||
self.get_recipe()
|
||||
return self.recipe['recipeTimeNote']
|
||||
|
||||
def yields(self):
|
||||
if not self.recipe:
|
||||
self.get_recipe()
|
||||
return self.recipe['yields']
|
||||
|
||||
def ingredients(self):
|
||||
if not self.recipe:
|
||||
self.get_recipe()
|
||||
ingredients = []
|
||||
for group in self.recipe['ingredientGroups']:
|
||||
ingredients += group['fields']['recipeIngredientItems']
|
||||
return [
|
||||
"{} {} {}{}".format(
|
||||
i['fields']['qty'] or '',
|
||||
i['fields']['measurement'] or '',
|
||||
i['fields']['ingredient']['fields']['title'] or '',
|
||||
i['fields']['postText'] or ''
|
||||
)
|
||||
for i in ingredients
|
||||
]
|
||||
|
||||
def instructions(self):
|
||||
if not self.recipe:
|
||||
self.get_recipe()
|
||||
if self.recipe.get('headnote', False):
|
||||
i = ['Note: ' + self.recipe.get('headnote', '')]
|
||||
else:
|
||||
i = []
|
||||
return "\n".join(
|
||||
i
|
||||
+ [self.recipe.get('whyThisWorks', '')]
|
||||
+ [
|
||||
instruction['fields']['content']
|
||||
for instruction in self.recipe['instructions']
|
||||
]
|
||||
)
|
||||
|
||||
def nutrients(self):
|
||||
raise NotImplementedError("This should be implemented.")
|
||||
|
||||
def get_recipe(self):
|
||||
j = json.loads(self.soup.find(type='application/json').string)
|
||||
name = list(j['props']['initialState']['content']['documents'])[0]
|
||||
self.recipe = j['props']['initialState']['content']['documents'][name]
|
||||
@@ -1,43 +0,0 @@
|
||||
from json import JSONDecodeError
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from recipe_scrapers import SCRAPERS, get_host_name
|
||||
from recipe_scrapers._factory import SchemaScraperFactory
|
||||
from recipe_scrapers._schemaorg import SchemaOrg
|
||||
|
||||
from .cooksillustrated import CooksIllustrated
|
||||
|
||||
CUSTOM_SCRAPERS = {
|
||||
CooksIllustrated.host(site="cooksillustrated"): CooksIllustrated,
|
||||
CooksIllustrated.host(site="americastestkitchen"): CooksIllustrated,
|
||||
CooksIllustrated.host(site="cookscountry"): CooksIllustrated,
|
||||
}
|
||||
SCRAPERS.update(CUSTOM_SCRAPERS)
|
||||
|
||||
|
||||
def text_scraper(text, url=None):
|
||||
domain = None
|
||||
if url:
|
||||
domain = get_host_name(url)
|
||||
if domain in SCRAPERS:
|
||||
scraper_class = SCRAPERS[domain]
|
||||
else:
|
||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
||||
|
||||
class TextScraper(scraper_class):
|
||||
def __init__(
|
||||
self,
|
||||
html=None,
|
||||
url=None,
|
||||
):
|
||||
self.supported_only = False
|
||||
self.meta_http_equiv = False
|
||||
self.soup = BeautifulSoup(html, "html.parser")
|
||||
self.url = url
|
||||
self.recipe = None
|
||||
try:
|
||||
self.schema = SchemaOrg(html)
|
||||
except (JSONDecodeError, AttributeError):
|
||||
pass
|
||||
|
||||
return TextScraper(url=url, html=text)
|
||||
Reference in New Issue
Block a user