mirror of
https://github.com/TandoorRecipes/recipes.git
synced 2026-01-05 06:08:46 -05:00
Call scrape_me first when scraping from url
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from json import JSONDecodeError
|
||||
from recipe_scrapers import SCRAPERS, get_host_name
|
||||
from recipe_scrapers import SCRAPERS
|
||||
from recipe_scrapers._factory import SchemaScraperFactory
|
||||
from recipe_scrapers._schemaorg import SchemaOrg
|
||||
|
||||
@@ -15,13 +15,7 @@ SCRAPERS.update(CUSTOM_SCRAPERS)
|
||||
|
||||
|
||||
def text_scraper(text, url=None):
|
||||
domain = None
|
||||
if url:
|
||||
domain = get_host_name(url)
|
||||
if domain in SCRAPERS:
|
||||
scraper_class = SCRAPERS[domain]
|
||||
else:
|
||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
||||
scraper_class = SchemaScraperFactory.SchemaScraper
|
||||
|
||||
class TextScraper(scraper_class):
|
||||
def __init__(
|
||||
|
||||
Reference in New Issue
Block a user