Skip to content

Instantly share code, notes, and snippets.

@abdalrohman
Last active October 18, 2024 21:50
Show Gist options
  • Select an option

  • Save abdalrohman/dbf61b6104570484f0be2631446b0e71 to your computer and use it in GitHub Desktop.

Select an option

Save abdalrohman/dbf61b6104570484f0be2631446b0e71 to your computer and use it in GitHub Desktop.
Pixabay API Client with Chaining Strategy Pattern
"""
Pixabay API Client with Chaining Strategy Pattern
A modern, type-safe client for interacting with the Pixabay API using a fluent interface.
Supports method chaining for building search queries and download media.
Author: M.Abdulrahman Alnaseer
GitHub: abdalrohman (github.com)
License: MIT
"""
from __future__ import annotations
import enum
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Literal, Self, TypedDict
from urllib.parse import quote, urlparse
import requests
from requests.exceptions import RequestException
from requests.structures import CaseInsensitiveDict
from tenacity import retry, stop_after_attempt, wait_exponential
# Configure logging
logger = logging.getLogger(__name__)
# Error classes for handling specific Pixabay API-related errors
class PixabayAPIError(Exception):
"""Base exception class for Pixabay API errors."""
class APIKeyError(PixabayAPIError):
"""Raised when the API key is missing or invalid."""
class NoResultsError(PixabayAPIError):
"""Raised when the API search returns no results."""
class DownloadError(PixabayAPIError):
"""Raised when there's an error downloading media."""
# Type definitions
MediaType = Literal["images", "videos"]
DownloadSize = Literal["large", "medium", "small", "tiny"]
OrderType = Literal["popular", "latest"]
ImageType = Literal["all", "photo", "illustration", "vector"]
VideoType = Literal["all", "film", "animation"]
Orientation = Literal["all", "horizontal", "vertical"]
@dataclass(frozen=True)
class APIConfig:
"""Configuration for the Pixabay API"""
BASE_URL: str = "https://pixabay.com/api/"
MIN_PER_PAGE: int = 3
MAX_PER_PAGE: int = 200
class ImageResponse(TypedDict):
"""Type definition for image response data"""
id: int
pageURL: str
type: str
tags: str
previewURL: str
previewWidth: int
previewHeight: int
webformatURL: str
webformatWidth: int
webformatHeight: int
largeImageURL: str
imageWidth: int
imageHeight: int
imageSize: int
views: int
downloads: int
collections: int
likes: int
comments: int
user_id: int
user: str
userImageURL: str
class VideoResponse(TypedDict):
"""Type definition for video response data"""
id: int
pageURL: str
type: str
tags: str
duration: int
videos: dict[str, dict[str, str]]
views: int
downloads: int
likes: int
comments: int
user_id: int
user: str
userImageURL: str
class APIResponse(TypedDict):
"""Type definition for API response"""
total: int
totalHits: int
hits: list[ImageResponse | VideoResponse]
class Language(enum.Enum):
"""Valid language codes for the Pixabay API"""
CS = "cs"
DA = "da"
DE = "de"
EN = "en"
ES = "es"
FR = "fr"
ID = "id"
IT = "it"
HU = "hu"
NL = "nl"
NO = "no"
PL = "pl"
PT = "pt"
RO = "ro"
SK = "sk"
FI = "fi"
SV = "sv"
TR = "tr"
VI = "vi"
TH = "th"
BG = "bg"
RU = "ru"
EL = "el"
JA = "ja"
KO = "ko"
ZH = "zh"
class Category(enum.Enum):
"""Valid categories for the Pixabay API"""
BACKGROUNDS = "backgrounds"
FASHION = "fashion"
NATURE = "nature"
SCIENCE = "science"
EDUCATION = "education"
FEELINGS = "feelings"
HEALTH = "health"
PEOPLE = "people"
RELIGION = "religion"
PLACES = "places"
ANIMALS = "animals"
INDUSTRY = "industry"
COMPUTER = "computer"
FOOD = "food"
SPORTS = "sports"
TRANSPORTATION = "transportation"
TRAVEL = "travel"
BUILDINGS = "buildings"
BUSINESS = "business"
MUSIC = "music"
class Color(enum.Enum):
"""Valid colors for the Pixabay API"""
GRAYSCALE = "grayscale"
TRANSPARENT = "transparent"
RED = "red"
ORANGE = "orange"
YELLOW = "yellow"
GREEN = "green"
TURQUOISE = "turquoise"
BLUE = "blue"
LILAC = "lilac"
PINK = "pink"
WHITE = "white"
GRAY = "gray"
BLACK = "black"
BROWN = "brown"
class SearchQuery:
"""Builder class for constructing Pixabay API search queries"""
def __init__(self, api_instance: PixabayAPI, params: dict[str, Any] | None = None):
self._api = api_instance
self._params = params or {
"lang": Language.EN.value,
"per_page": APIConfig.MAX_PER_PAGE,
"safesearch": False,
"order": "popular",
}
self._media_type: MediaType = "images"
self._download_dir: str | None = None
self._max_downloads: int | None = None
def with_query(self, query: str) -> Self:
"""Set the search query string"""
self._params["q"] = quote(query, safe="")
return self
def with_language(self, language: Language) -> Self:
"""Set the language for the search"""
self._params["lang"] = language.value
return self
def with_category(self, category: Category) -> Self:
"""Set the category for the search"""
self._params["category"] = category.value
return self
def with_page(self, page: int) -> Self:
"""Set the page number for pagination"""
if page < 1:
raise ValueError("Page number must be greater than 0")
self._params["page"] = page
return self
def with_per_page(self, per_page: int) -> Self:
"""Set the number of results per page"""
if not (APIConfig.MIN_PER_PAGE <= per_page <= APIConfig.MAX_PER_PAGE):
raise ValueError(f"per_page must be between {APIConfig.MIN_PER_PAGE} " f"and {APIConfig.MAX_PER_PAGE}")
self._params["per_page"] = per_page
return self
def with_min_width(self, width: int) -> Self:
"""Set the minimum width for media items"""
if width < 0:
raise ValueError("Width must be non-negative")
self._params["min_width"] = width
return self
def with_min_height(self, height: int) -> Self:
"""Set the minimum height for media items"""
if height < 0:
raise ValueError("Height must be non-negative")
self._params["min_height"] = height
return self
def with_editors_choice(self, editors_choice: bool = True) -> Self:
"""Filter for editor's choice items"""
self._params["editors_choice"] = str(editors_choice).lower()
return self
def with_safesearch(self, safesearch: bool = True) -> Self:
"""Enable or disable safe search"""
self._params["safesearch"] = str(safesearch).lower()
return self
def with_order(self, order: OrderType) -> Self:
"""Set the order of results"""
self._params["order"] = order
return self
def for_images(self) -> ImageSearchQuery:
"""Switch to image-specific search parameters"""
self._media_type = "images"
return ImageSearchQuery(self._api, self._params)
def for_videos(self) -> VideoSearchQuery:
"""Switch to video-specific search parameters"""
self._media_type = "videos"
return VideoSearchQuery(self._api, self._params)
def download_to(self, save_dir: str, max_items: int | None = None) -> Self:
"""Set the directory to save downloaded files and optionally limit the number of downloads"""
self._download_dir = save_dir
self._max_downloads = max_items
return self
def execute(self) -> APIResponse | list[Path]:
"""Execute the search query and optionally download results"""
results = self._api.search(self._params["q"], self._media_type, **self._params)
if self._download_dir:
return self._download_results(results)
return results
def _download_results(self, results: APIResponse) -> list[Path]:
"""Download the search results"""
items_to_download = results["hits"]
if self._max_downloads:
items_to_download = items_to_download[: self._max_downloads]
downloaded_paths = []
for item in items_to_download:
try:
saved_path = self._api.download(item_data=item, save_dir=self._download_dir)
downloaded_paths.append(saved_path)
logger.info(f"Downloaded successfully: {saved_path}")
except DownloadError as e:
logger.exception(f"Failed to download item: {e}")
return downloaded_paths
class ImageSearchQuery(SearchQuery):
"""Image-specific search query builder"""
def __init__(self, api_instance: PixabayAPI, params: dict[str, Any]):
super().__init__(api_instance, params)
self._media_type = "images"
def with_image_type(self, image_type: ImageType) -> Self:
"""Set the type of images to search for"""
self._params["image_type"] = image_type
return self
def with_orientation(self, orientation: Orientation) -> Self:
"""Set the orientation of images"""
self._params["orientation"] = orientation
return self
def with_color(self, color: Color) -> Self:
"""Set the predominant color to search for"""
self._params["colors"] = color.value
return self
class VideoSearchQuery(SearchQuery):
"""Video-specific search query builder"""
def __init__(self, api_instance: PixabayAPI, params: dict[str, Any]):
super().__init__(api_instance, params)
self._media_type = "videos"
def with_video_type(self, video_type: VideoType) -> Self:
"""Set the type of videos to search for"""
self._params["video_type"] = video_type
return self
class PixabayAPI:
"""A modern client for interacting with the Pixabay API with chaining support."""
def __init__(self, api_key: str | None = None, download_size: DownloadSize = "large"):
"""
Initialize the Pixabay API client.
:param api_key: Your Pixabay API key
:param download_size: Preferred download size for media
:raises APIKeyError: If no API key is provided
"""
self.api_key = api_key or os.getenv("PIXABAY_API_KEY")
if not self.api_key:
raise APIKeyError("PIXABAY_API_KEY environment variable is not set")
self.download_size = download_size
self.config = APIConfig()
self.session = requests.Session()
self.session.headers.update(
{
"User-Agent": "PixabayAPI/1.0",
}
)
def search_builder(self) -> SearchQuery:
"""
Create a new search query builder.
:return: A SearchQuery instance for building the search query
"""
return SearchQuery(self)
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def search(self, query_string: str, media_type: MediaType = "images", **kwargs) -> APIResponse:
"""
Search for images or videos using the Pixabay API.
:param query_string: Search query string
:param media_type: Type of media to search for
:param kwargs: Additional search parameters
:return: API response data
:raises PixabayAPIError: If the API request fails
"""
params = {**kwargs, "key": self.api_key, "q": query_string}
url = f"{self.config.BASE_URL}{'videos/' if media_type == 'videos' else ''}"
return self._make_api_request(url, params)
def download(self, item_data: ImageResponse | VideoResponse, save_dir: str) -> Path:
"""
Download a media item from Pixabay.
:param item_data: Media item data
:param save_dir: Directory to save the downloaded file
:return: Path to the downloaded file
:raises DownloadError: If download fails
"""
save_dir_path = Path(save_dir)
save_dir_path.mkdir(parents=True, exist_ok=True)
media_type: MediaType = "images" if item_data.get("type") == "photo" else "videos"
download_url = self._get_download_url(item_data, media_type)
filename = Path(urlparse(download_url).path).name
save_path = save_dir_path / filename
try:
# TODO change the name othe downloaded file with {id}.
with self.session.get(download_url, stream=True) as response:
response.raise_for_status()
with open(save_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
except RequestException as e:
raise DownloadError(f"Failed to download {media_type}: {e}")
return save_path
def _get_download_url(self, item_data: ImageResponse | VideoResponse, media_type: MediaType) -> str:
"""
Get the appropriate download URL based on media type.
"""
if media_type == "images":
sizes: dict[DownloadSize, str] = {
"large": "largeImageURL",
"medium": "webformatURL",
"small": "previewURL",
"tiny": "previewURL",
}
url: str = item_data.get(sizes[self.download_size])
if url and isinstance(url, str):
return url
raise ValueError(f"No valid image URL found for size: {self.download_size}")
if media_type == "videos":
url: str = item_data.get("videos", {}).get(self.download_size, {}).get("url")
if url and isinstance(url, str):
return url
raise ValueError(f"No valid video URL found for size: {self.download_size}")
raise ValueError(f"Invalid media type: {media_type}")
def _make_api_request(self, url: str, params: dict[str, Any]) -> APIResponse:
"""Make an API request with retry logic."""
try:
response = self.session.get(url, params=params, timeout=10)
response.raise_for_status()
self._log_rate_limit_info(response.headers)
raw_data: dict = response.json()
# Convert the raw data to APIResponse format
api_response: APIResponse = {
"total": raw_data.get("total", 0),
"totalHits": raw_data.get("totalHits", 0),
"hits": [],
}
for hit in raw_data.get("hits", []):
if hit.get("type") == "photo":
api_response["hits"].append(ImageResponse(**hit))
elif hit.get("type") == "film":
api_response["hits"].append(VideoResponse(**hit))
if api_response["totalHits"] > 0:
return api_response
raise NoResultsError("No results found for the given query.")
except RequestException as e:
raise PixabayAPIError(f"API request failed: {e}")
def _log_rate_limit_info(self, headers: CaseInsensitiveDict[str]) -> None:
"""Log rate limit information from response headers."""
logger.info("Rate Limit Info:")
logger.info(f"Limit: {headers.get('X-RateLimit-Limit')}")
logger.info(f"Remaining: {headers.get('X-RateLimit-Remaining')}")
logger.info(f"Reset: {headers.get('X-RateLimit-Reset')} seconds")
if __name__ == "__main__":
# Initialize the API client
api = PixabayAPI()
# Search for nature images and download them using method chaining
try:
downloaded_images = (
api.search_builder()
.with_query("beautiful nature")
.with_language(Language.EN)
.with_category(Category.NATURE)
.with_safesearch(True)
.with_per_page(20)
.for_images()
.with_orientation("horizontal")
.with_image_type("photo")
.with_color(Color.GREEN)
.download_to("images", max_items=10)
.execute()
)
print(f"Downloaded {len(downloaded_images)} images")
# Search for video content and download
downloaded_videos = (
api.search_builder()
.with_query("timelapse nature")
.with_category(Category.NATURE)
.for_videos()
.with_video_type("film")
.download_to("videos", max_items=5)
.execute()
)
print(f"Downloaded {len(downloaded_videos)} videos")
except PixabayAPIError as e:
logger.exception(f"Search or download failed: {e}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment