| import os | |
| # Specify the LLM model to use. You can choose any LLM supported by LiteLLM. | |
| # Example options include "gpt-4o", "claude", "deepseek-chat", etc. | |
| # For a full list of supported models, refer to: | |
| # https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json | |
| #LLM_MODEL = "groq/llama3-8b-8192" | |
| #LLM_MODEL= "gemini-2.5-pro-exp-03-25" | |
| LLM_MODEL ="openai/gpt-4o-mini" | |
| # API token for authentication with the LLM provider. | |
| # This is fetched from the environment variable "GEMINI_API_KEY". | |
| #PI_TOKEN = os.getenv("GEMINI_API_KEY") | |
| #API_TOKEN="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" | |
| #API_TOKEN="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" | |
| import os | |
| API_TOKEN= os.getenv("OPENAI_API_KEY") | |
| # Base URL of the website to scrape. | |
| # In this example, we are scraping Yellow Pages for dentists in Toronto, ON. | |
| # You can modify the URL to change the location or the type of business. | |
| # Example: | |
| # - For plumbers in Vancouver: "https://www.yellowpages.ca/search/si/{page_number}/Plumbers/Vancouver+BC" | |
| # - For restaurants in Montreal: "https://www.yellowpages.ca/search/si/{page_number}/Restaurants/Montreal+QC" | |
| BASE_URL = "https://gentledental.ai/" | |
| # CSS selector to target the main HTML element containing the business information. | |
| # This is specific to Yellow Pages and helps focus the scraper on relevant content | |
| # instead of sending the entire HTML page to the LLM. | |
| # CSS_SELECTOR = "[class^='listing_right_section']" | |
| CSS_SELECTOR = "" | |
| # Maximum number of pages to crawl. Adjust this value based on how much data you want to scrape. | |
| MAX_PAGES = 3 # Example: Set to 5 to scrape 5 pages. | |
| # Instructions for the LLM on what information to extract from the scraped content. | |
| # The LLM will extract the following details for each business: | |
| # - Name | |
| # - Address | |
| # - Website | |
| # - Phone number | |
| # - A one-sentence description | |
| SCRAPER_INSTRUCTIONS = ( | |
| "Extract all business information: 'name', 'address', 'website'" | |
| ", 'phone number' and a one-sentence 'description' from the following content." | |
| ) |