zone559 · August 30, 2025 21:21
diff --git a/imagepondprofileextraction.py b/imagepondprofileextraction.py
 import requests
 import re
 from urllib.parse import urljoin

 def is_user_profile(url):
    """Check if the URL is a user profile"""
    try:
        response = requests.get(url)
        response.raise_for_status()
        
        # Check for user profile indicators in the HTML
        profile_indicators = [
            r'class="top-user-credentials"',
            r'class="top-user-avatar"',
            r'data-route="[^"]+"',  # data-route attribute often contains username
            r'Files by [^<]+</a>',  # "Files by username" text
            r'<div class="user-meta">',
            r'data-text="following-count"',
            r'data-text="followers-count"'
        ]
        
        # If any of these patterns are found, it's likely a user profile
        html_content = response.text
        for pattern in profile_indicators:
            if re.search(pattern, html_content):
                return True
                
        return False
        
    except requests.RequestException:
        return False
    except Exception:
        return False

 def extract_username(url):
    """Extract username from user profile"""
    try:
        response = requests.get(url)
        response.raise_for_status()
        
        # Method 1: Look for username in JavaScript data
        username_pattern = r'username:\s*"([^"]+)"'
        match = re.search(username_pattern, response.text)
        if match:
            return match.group(1)
        
        # Method 2: Look for username in data-route attribute
        route_pattern = r'data-route="([^"]+)"'
        match = re.search(route_pattern, response.text)
        if match:
            return match.group(1)
        
        # Method 3: Extract from URL path
        path_username = url.rstrip('/').split('/')[-1]
        if path_username and not path_username.startswith('?'):
            return path_username
            
        return "Username not found"
            
    except requests.RequestException as e:
        return f"Error making request: {e}"
    except Exception as e:
        return f"Error: {e}"

 def extract_next_page_url(html_content, base_url):
    """Extract the next page URL from pagination HTML"""
    next_page_pattern = r'<li class="pagination-next"><a data-pagination="next" href="([^"]+)"'
    match = re.search(next_page_pattern, html_content)
    
    if match:
        next_page_relative = match.group(1)
        return urljoin(base_url, next_page_relative)
    return None

 def extract_image_urls_from_page(html_content):
    """Extract image/video URLs from a single page's HTML content"""
    pattern = r'<a href="([^"]+)" class="image-container --media">'
    return re.findall(pattern, html_content)

 def extract_all_image_urls(start_url, max_pages=None):
    """Extract image/video URLs from all pages with pagination (only for user profiles)"""
    # First check if it's a user profile
    if not is_user_profile(start_url):
        print("This is not a user profile. Skipping pagination extraction.")
        return []
    
    username = extract_username(start_url)
    print(f"User profile detected: {username}")
    
    all_urls = []
    current_url = start_url
    page_count = 0
    
    try:
        while current_url and (max_pages is None or page_count < max_pages):
            response = requests.get(current_url)
            response.raise_for_status()
            
            print(f"Processing page {page_count + 1}: {current_url}")
            
            page_urls = extract_image_urls_from_page(response.text)
            all_urls.extend(page_urls)
            
            next_page_url = extract_next_page_url(response.text, current_url)
            
            if not next_page_url:
                print("No more pages found.")
                break
                
            current_url = next_page_url
            page_count += 1
            
    except requests.RequestException as e:
        print(f"Error making request: {e}")
    except Exception as e:
        print(f"Error: {e}")
    
    return all_urls

 def get_full_urls(relative_urls, base_domain="https://imagepond.net"):
    """Convert relative URLs to full URLs"""
    return [urljoin(base_domain, url) for url in relative_urls]

 # Example usage
 url = "https://imagepond.net/ap000"

 # Check if it's a user profile and extract accordingly
 if is_user_profile(url):
    username = extract_username(url)
    print(f"Username: {username}")
    
    # Extract URLs with pagination
    relative_urls = extract_all_image_urls(url)
    full_urls = get_full_urls(relative_urls)
    
    print(f"\nFound {len(full_urls)} image/video URLs:")
    for i, full_url in enumerate(full_urls, 1):
        print(f"{i}. {full_url}")
 else:
    print("This URL is not a user profile. Only extracting username if available.")
    username = extract_username(url)
    if username != "Username not found":
        print(f"Username: {username}")
    else:
        print("No username found.")
	import requests
	import re
	from urllib.parse import urljoin

	def is_user_profile(url):
	"""Check if the URL is a user profile"""
	try:
	response = requests.get(url)
	response.raise_for_status()

	# Check for user profile indicators in the HTML
	profile_indicators = [
	r'class="top-user-credentials"',
	r'class="top-user-avatar"',
	r'data-route="[^"]+"', # data-route attribute often contains username
	r'Files by [^<]+</a>', # "Files by username" text
	r'<div class="user-meta">',
	r'data-text="following-count"',
	r'data-text="followers-count"'
	]

	# If any of these patterns are found, it's likely a user profile
	html_content = response.text
	for pattern in profile_indicators:
	if re.search(pattern, html_content):
	return True

	return False

	except requests.RequestException:
	return False
	except Exception:
	return False

	def extract_username(url):
	"""Extract username from user profile"""
	try:
	response = requests.get(url)
	response.raise_for_status()

	# Method 1: Look for username in JavaScript data
	username_pattern = r'username:\s*"([^"]+)"'
	match = re.search(username_pattern, response.text)
	if match:
	return match.group(1)

	# Method 2: Look for username in data-route attribute
	route_pattern = r'data-route="([^"]+)"'
	match = re.search(route_pattern, response.text)
	if match:
	return match.group(1)

	# Method 3: Extract from URL path
	path_username = url.rstrip('/').split('/')[-1]
	if path_username and not path_username.startswith('?'):
	return path_username

	return "Username not found"

	except requests.RequestException as e:
	return f"Error making request: {e}"
	except Exception as e:
	return f"Error: {e}"

	def extract_next_page_url(html_content, base_url):
	"""Extract the next page URL from pagination HTML"""
	next_page_pattern = r'<li class="pagination-next"><a data-pagination="next" href="([^"]+)"'
	match = re.search(next_page_pattern, html_content)

	if match:
	next_page_relative = match.group(1)
	return urljoin(base_url, next_page_relative)
	return None

	def extract_image_urls_from_page(html_content):
	"""Extract image/video URLs from a single page's HTML content"""
	pattern = r'<a href="([^"]+)" class="image-container --media">'
	return re.findall(pattern, html_content)

	def extract_all_image_urls(start_url, max_pages=None):
	"""Extract image/video URLs from all pages with pagination (only for user profiles)"""
	# First check if it's a user profile
	if not is_user_profile(start_url):
	print("This is not a user profile. Skipping pagination extraction.")
	return []

	username = extract_username(start_url)
	print(f"User profile detected: {username}")

	all_urls = []
	current_url = start_url
	page_count = 0

	try:
	while current_url and (max_pages is None or page_count < max_pages):
	response = requests.get(current_url)
	response.raise_for_status()

	print(f"Processing page {page_count + 1}: {current_url}")

	page_urls = extract_image_urls_from_page(response.text)
	all_urls.extend(page_urls)

	next_page_url = extract_next_page_url(response.text, current_url)

	if not next_page_url:
	print("No more pages found.")
	break

	current_url = next_page_url
	page_count += 1

	except requests.RequestException as e:
	print(f"Error making request: {e}")
	except Exception as e:
	print(f"Error: {e}")

	return all_urls

	def get_full_urls(relative_urls, base_domain="https://imagepond.net"):
	"""Convert relative URLs to full URLs"""
	return [urljoin(base_domain, url) for url in relative_urls]

	# Example usage
	url = "https://imagepond.net/ap000"

	# Check if it's a user profile and extract accordingly
	if is_user_profile(url):
	username = extract_username(url)
	print(f"Username: {username}")

	# Extract URLs with pagination
	relative_urls = extract_all_image_urls(url)
	full_urls = get_full_urls(relative_urls)

	print(f"\nFound {len(full_urls)} image/video URLs:")
	for i, full_url in enumerate(full_urls, 1):
	print(f"{i}. {full_url}")
	else:
	print("This URL is not a user profile. Only extracting username if available.")
	username = extract_username(url)
	if username != "Username not found":
	print(f"Username: {username}")
	else:
	print("No username found.")
No results found