Created
June 25, 2025 22:58
-
-
Save JGalego/818d88d91dd6fc9ed936879dccae16cf to your computer and use it in GitHub Desktop.
A simple recon demo using ScrapeGraphAI with Groq 🕷️
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.7,<=3.11" | |
| # dependencies = [ | |
| # "langchain-groq >= 0.3.4", | |
| # "scrapegraphai >= 1.57.0", | |
| # ] | |
| # /// | |
| """ | |
| Simple recon script using scrapegraphai with Groq | |
| """ | |
| # Standard imports | |
| import os | |
| import json | |
| from getpass import getpass | |
| # Library imports | |
| from scrapegraphai.graphs import SmartScraperGraph | |
| # Define the configuration for the scraping pipeline | |
| graph_config = { | |
| 'llm': { | |
| 'model': os.getenv('GROQ_MODEL', "groq/meta-llama/llama-4-scout-17b-16e-instruct"), | |
| 'api_key': os.getenv('GROQ_API_KEY') or getpass("API Key: "), | |
| }, | |
| 'verbose': True, | |
| 'headless': True, | |
| } | |
| # Define scraper instructions | |
| INSTRUCTIONS = """You are an expert penetration tester participating in a AI red teaming exercise. | |
| Your task is to extract useful information from the webpage, including a description of what the | |
| company does, founders, and social media links.""" | |
| # Initialize the scraper | |
| graph = SmartScraperGraph( | |
| prompt=INSTRUCTIONS, | |
| source=input("Enter the URL to scrape: "), | |
| config=graph_config | |
| ) | |
| # Run the pipeline | |
| result = graph.run() | |
| # Pretty print the result | |
| print(json.dumps(result, indent=4)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment