Skip to content

Instantly share code, notes, and snippets.

@zone559
Created August 30, 2025 21:21
Show Gist options
  • Select an option

  • Save zone559/9890c5603128b0563b1f3c8f7514d2d8 to your computer and use it in GitHub Desktop.

Select an option

Save zone559/9890c5603128b0563b1f3c8f7514d2d8 to your computer and use it in GitHub Desktop.
import requests
import re
from urllib.parse import urljoin
def is_user_profile(url):
"""Check if the URL is a user profile"""
try:
response = requests.get(url)
response.raise_for_status()
# Check for user profile indicators in the HTML
profile_indicators = [
r'class="top-user-credentials"',
r'class="top-user-avatar"',
r'data-route="[^"]+"', # data-route attribute often contains username
r'Files by [^<]+</a>', # "Files by username" text
r'<div class="user-meta">',
r'data-text="following-count"',
r'data-text="followers-count"'
]
# If any of these patterns are found, it's likely a user profile
html_content = response.text
for pattern in profile_indicators:
if re.search(pattern, html_content):
return True
return False
except requests.RequestException:
return False
except Exception:
return False
def extract_username(url):
"""Extract username from user profile"""
try:
response = requests.get(url)
response.raise_for_status()
# Method 1: Look for username in JavaScript data
username_pattern = r'username:\s*"([^"]+)"'
match = re.search(username_pattern, response.text)
if match:
return match.group(1)
# Method 2: Look for username in data-route attribute
route_pattern = r'data-route="([^"]+)"'
match = re.search(route_pattern, response.text)
if match:
return match.group(1)
# Method 3: Extract from URL path
path_username = url.rstrip('/').split('/')[-1]
if path_username and not path_username.startswith('?'):
return path_username
return "Username not found"
except requests.RequestException as e:
return f"Error making request: {e}"
except Exception as e:
return f"Error: {e}"
def extract_next_page_url(html_content, base_url):
"""Extract the next page URL from pagination HTML"""
next_page_pattern = r'<li class="pagination-next"><a data-pagination="next" href="([^"]+)"'
match = re.search(next_page_pattern, html_content)
if match:
next_page_relative = match.group(1)
return urljoin(base_url, next_page_relative)
return None
def extract_image_urls_from_page(html_content):
"""Extract image/video URLs from a single page's HTML content"""
pattern = r'<a href="([^"]+)" class="image-container --media">'
return re.findall(pattern, html_content)
def extract_all_image_urls(start_url, max_pages=None):
"""Extract image/video URLs from all pages with pagination (only for user profiles)"""
# First check if it's a user profile
if not is_user_profile(start_url):
print("This is not a user profile. Skipping pagination extraction.")
return []
username = extract_username(start_url)
print(f"User profile detected: {username}")
all_urls = []
current_url = start_url
page_count = 0
try:
while current_url and (max_pages is None or page_count < max_pages):
response = requests.get(current_url)
response.raise_for_status()
print(f"Processing page {page_count + 1}: {current_url}")
page_urls = extract_image_urls_from_page(response.text)
all_urls.extend(page_urls)
next_page_url = extract_next_page_url(response.text, current_url)
if not next_page_url:
print("No more pages found.")
break
current_url = next_page_url
page_count += 1
except requests.RequestException as e:
print(f"Error making request: {e}")
except Exception as e:
print(f"Error: {e}")
return all_urls
def get_full_urls(relative_urls, base_domain="https://imagepond.net"):
"""Convert relative URLs to full URLs"""
return [urljoin(base_domain, url) for url in relative_urls]
# Example usage
url = "https://imagepond.net/ap000"
# Check if it's a user profile and extract accordingly
if is_user_profile(url):
username = extract_username(url)
print(f"Username: {username}")
# Extract URLs with pagination
relative_urls = extract_all_image_urls(url)
full_urls = get_full_urls(relative_urls)
print(f"\nFound {len(full_urls)} image/video URLs:")
for i, full_url in enumerate(full_urls, 1):
print(f"{i}. {full_url}")
else:
print("This URL is not a user profile. Only extracting username if available.")
username = extract_username(url)
if username != "Username not found":
print(f"Username: {username}")
else:
print("No username found.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment