| description |
|---|
Comprehensive Python coding standards, best practices, and guidelines for writing clean, maintainable, and efficient Python code |
This guide provides comprehensive coding standards, best practices, and guidelines for writing clean, maintainable, and efficient Python code based on industry standards and community best practices.
Follow the Python Enhancement Proposal 8 (PEP 8) style guide for consistent code formatting.
# Variables and functions: snake_case
user_name = "john_doe"
def calculate_total_price():
pass
# Constants: UPPER_SNAKE_CASE
MAX_RETRY_ATTEMPTS = 3
DEFAULT_TIMEOUT = 30
# Classes: PascalCase
class UserAccount:
pass
# Private attributes/methods: leading underscore
class BankAccount:
def __init__(self):
self._balance = 0 # Protected
self.__account_number = None # Private
# Modules: snake_case
# user_authentication.py
# payment_processor.py# 1. Standard library imports
import os
import sys
from datetime import datetime, timedelta
from typing import List, Dict, Optional
# 2. Third-party imports
import requests
import pandas as pd
from flask import Flask, request
# 3. Local application imports
from .models import User
from .utils import validate_email
from ..config import settings# Maximum 79 characters per line
# Use parentheses for line continuation
result = some_function(
argument_one,
argument_two,
argument_three,
argument_four
)
# Dictionary formatting
user_data = {
'name': 'John Doe',
'email': '[email protected]',
'age': 30,
'is_active': True
}
# List comprehensions
filtered_users = [
user for user in users
if user.is_active and user.age >= 18
]from typing import List, Dict, Optional, Union, Callable, Any
from dataclasses import dataclass
def process_user_data(
user_id: int,
email: str,
preferences: Optional[Dict[str, Any]] = None
) -> Dict[str, Union[str, int, bool]]:
"""Process user data and return formatted result."""
if preferences is None:
preferences = {}
return {
'id': user_id,
'email': email,
'has_preferences': bool(preferences)
}
# Generic types
T = TypeVar('T')
def get_first_item(items: List[T]) -> Optional[T]:
"""Return the first item from a list, or None if empty."""
return items[0] if items else None
# Dataclasses with type hints
@dataclass
class User:
id: int
name: str
email: str
is_active: bool = True
created_at: datetime = field(default_factory=datetime.now)def calculate_compound_interest(
principal: float,
rate: float,
time: float,
n: int = 1
) -> float:
"""
Calculate compound interest.
Args:
principal: The initial amount of money
rate: Annual interest rate (as a decimal, e.g., 0.05 for 5%)
time: Time period in years
n: Number of times interest is compounded per year (default: 1)
Returns:
The final amount after compound interest
Raises:
ValueError: If any parameter is negative
Example:
>>> calculate_compound_interest(1000, 0.05, 2, 4)
1104.62
"""
if any(param < 0 for param in [principal, rate, time, n]):
raise ValueError("All parameters must be non-negative")
return principal * (1 + rate / n) ** (n * time)
class DatabaseConnection:
"""
Manages database connections with automatic cleanup.
This class provides a context manager for database connections,
ensuring proper resource cleanup even if exceptions occur.
Attributes:
host: Database host address
port: Database port number
database: Database name
Example:
with DatabaseConnection('localhost', 5432, 'mydb') as conn:
result = conn.execute("SELECT * FROM users")
"""
def __init__(self, host: str, port: int, database: str):
self.host = host
self.port = port
self.database = database
self._connection = None# Use specific exception types
class ValidationError(Exception):
"""Raised when data validation fails."""
pass
class DatabaseError(Exception):
"""Raised when database operations fail."""
pass
# Proper exception handling
def process_user_input(user_input: str) -> Dict[str, Any]:
"""Process and validate user input."""
try:
# Validate input
if not user_input.strip():
raise ValidationError("Input cannot be empty")
# Process data
data = json.loads(user_input)
# Validate required fields
required_fields = ['name', 'email']
missing_fields = [field for field in required_fields if field not in data]
if missing_fields:
raise ValidationError(f"Missing required fields: {missing_fields}")
return data
except json.JSONDecodeError as e:
raise ValidationError(f"Invalid JSON format: {e}")
except ValidationError:
raise # Re-raise validation errors
except Exception as e:
# Log unexpected errors
logger.error(f"Unexpected error processing user input: {e}")
raise DatabaseError("Failed to process user input")
# Context managers for resource management
from contextlib import contextmanager
@contextmanager
def database_transaction():
"""Context manager for database transactions."""
conn = get_database_connection()
trans = conn.begin()
try:
yield conn
trans.commit()
except Exception:
trans.rollback()
raise
finally:
conn.close()
# Usage
try:
with database_transaction() as conn:
conn.execute("INSERT INTO users (name) VALUES (?)", ("John",))
except DatabaseError as e:
logger.error(f"Database operation failed: {e}")
return {"error": "Failed to create user"}from collections import defaultdict, deque, Counter, namedtuple
from functools import lru_cache
import bisect
# Use appropriate data structures
# defaultdict for grouping
def group_users_by_department(users):
departments = defaultdict(list)
for user in users:
departments[user.department].append(user)
return dict(departments)
# deque for efficient queue operations
class TaskQueue:
def __init__(self):
self._queue = deque()
def add_task(self, task):
self._queue.append(task)
def get_next_task(self):
return self._queue.popleft() if self._queue else None
# Counter for frequency analysis
def analyze_word_frequency(text: str) -> Dict[str, int]:
words = text.lower().split()
return dict(Counter(words))
# namedtuple for immutable data structures
Point = namedtuple('Point', ['x', 'y'])
User = namedtuple('User', ['id', 'name', 'email'])
# Use generators for memory efficiency
def read_large_file(filename: str):
"""Generator to read large files line by line."""
with open(filename, 'r') as file:
for line in file:
yield line.strip()
# Efficient sorting and searching
def binary_search_insert(sorted_list: List[int], value: int) -> int:
"""Find insertion point for value in sorted list."""
return bisect.bisect_left(sorted_list, value)
# Memoization for expensive computations
@lru_cache(maxsize=128)
def fibonacci(n: int) -> int:
"""Calculate Fibonacci number with memoization."""
if n < 2:
return n
return fibonacci(n - 1) + fibonacci(n - 2)from abc import ABC, abstractmethod
from typing import Protocol
# Abstract base classes
class PaymentProcessor(ABC):
"""Abstract base class for payment processors."""
@abstractmethod
def process_payment(self, amount: float, currency: str) -> bool:
"""Process a payment transaction."""
pass
@abstractmethod
def refund_payment(self, transaction_id: str) -> bool:
"""Refund a payment transaction."""
pass
class CreditCardProcessor(PaymentProcessor):
def __init__(self, api_key: str):
self.api_key = api_key
def process_payment(self, amount: float, currency: str) -> bool:
# Implementation for credit card processing
return True
def refund_payment(self, transaction_id: str) -> bool:
# Implementation for credit card refund
return True
# Protocols for duck typing
class Drawable(Protocol):
def draw(self) -> None: ...
class Circle:
def __init__(self, radius: float):
self.radius = radius
def draw(self) -> None:
print(f"Drawing circle with radius {self.radius}")
def render_shape(shape: Drawable) -> None:
"""Render any drawable shape."""
shape.draw()
# Property decorators
class Temperature:
def __init__(self, celsius: float = 0):
self._celsius = celsius
@property
def celsius(self) -> float:
return self._celsius
@celsius.setter
def celsius(self, value: float) -> None:
if value < -273.15:
raise ValueError("Temperature cannot be below absolute zero")
self._celsius = value
@property
def fahrenheit(self) -> float:
return (self._celsius * 9/5) + 32
@fahrenheit.setter
def fahrenheit(self, value: float) -> None:
self.celsius = (value - 32) * 5/9
# Composition over inheritance
class Engine:
def __init__(self, horsepower: int):
self.horsepower = horsepower
def start(self) -> None:
print("Engine started")
class Car:
def __init__(self, make: str, model: str, engine: Engine):
self.make = make
self.model = model
self.engine = engine
def start(self) -> None:
print(f"Starting {self.make} {self.model}")
self.engine.start()import pytest
from unittest.mock import Mock, patch
from myapp.models import User
from myapp.services import UserService
class TestUserService:
@pytest.fixture
def user_service(self):
return UserService()
@pytest.fixture
def sample_user(self):
return User(id=1, name="John Doe", email="[email protected]")
def test_create_user_success(self, user_service, sample_user):
# Arrange
user_data = {
"name": "John Doe",
"email": "[email protected]"
}
# Act
result = user_service.create_user(user_data)
# Assert
assert result.name == "John Doe"
assert result.email == "[email protected]"
assert result.id is not None
def test_create_user_invalid_email(self, user_service):
# Arrange
user_data = {
"name": "John Doe",
"email": "invalid-email"
}
# Act & Assert
with pytest.raises(ValidationError) as exc_info:
user_service.create_user(user_data)
assert "Invalid email format" in str(exc_info.value)
@patch('myapp.services.database')
def test_get_user_database_error(self, mock_db, user_service):
# Arrange
mock_db.get_user.side_effect = DatabaseError("Connection failed")
# Act & Assert
with pytest.raises(DatabaseError):
user_service.get_user(1)
@pytest.mark.parametrize("user_id,expected", [
(1, True),
(999, False),
(0, False),
(-1, False)
])
def test_user_exists(self, user_service, user_id, expected):
# Mock the database response
with patch.object(user_service, '_check_user_exists') as mock_check:
mock_check.return_value = expected
result = user_service.user_exists(user_id)
assert result == expected
# Property-based testing with Hypothesis
from hypothesis import given, strategies as st
@given(st.integers(min_value=0, max_value=100))
def test_calculate_discount_percentage(percentage):
"""Test discount calculation with various percentages."""
original_price = 100.0
result = calculate_discount(original_price, percentage)
# Properties that should always hold
assert 0 <= result <= original_price
assert result == original_price * (1 - percentage / 100)
@given(st.text(min_size=1), st.emails())
def test_user_creation_properties(name, email):
"""Test user creation with random valid inputs."""
user = User(name=name, email=email)
assert user.name == name
assert user.email == email
assert user.created_at is not Noneimport cProfile
import time
from functools import wraps
from typing import Callable, Any
# Performance measurement decorator
def measure_time(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs) -> Any:
start_time = time.perf_counter()
result = func(*args, **kwargs)
end_time = time.perf_counter()
print(f"{func.__name__} took {end_time - start_time:.4f} seconds")
return result
return wrapper
# Memory-efficient data processing
def process_large_dataset(filename: str) -> Dict[str, int]:
"""Process large dataset efficiently using generators."""
result = defaultdict(int)
# Use generator to avoid loading entire file into memory
def read_chunks(file, chunk_size=1024):
while True:
chunk = file.read(chunk_size)
if not chunk:
break
yield chunk
with open(filename, 'r') as file:
for chunk in read_chunks(file):
# Process chunk
for word in chunk.split():
result[word.lower()] += 1
return dict(result)
# Efficient algorithms
def merge_sorted_lists(list1: List[int], list2: List[int]) -> List[int]:
"""Merge two sorted lists efficiently."""
result = []
i = j = 0
while i < len(list1) and j < len(list2):
if list1[i] <= list2[j]:
result.append(list1[i])
i += 1
else:
result.append(list2[j])
j += 1
# Add remaining elements
result.extend(list1[i:])
result.extend(list2[j:])
return result
# Use appropriate data structures for performance
class LRUCache:
"""Least Recently Used cache implementation."""
def __init__(self, capacity: int):
self.capacity = capacity
self.cache = {}
self.order = deque()
def get(self, key: str) -> Any:
if key in self.cache:
# Move to end (most recently used)
self.order.remove(key)
self.order.append(key)
return self.cache[key]
return None
def put(self, key: str, value: Any) -> None:
if key in self.cache:
self.order.remove(key)
elif len(self.cache) >= self.capacity:
# Remove least recently used
oldest = self.order.popleft()
del self.cache[oldest]
self.cache[key] = value
self.order.append(key)import re
import hashlib
import secrets
from html import escape
def validate_email(email: str) -> bool:
"""Validate email format using regex."""
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return bool(re.match(pattern, email))
def sanitize_user_input(user_input: str) -> str:
"""Sanitize user input to prevent XSS attacks."""
# Remove or escape HTML tags
sanitized = escape(user_input)
# Remove potentially dangerous characters
sanitized = re.sub(r'[<>"\']', '', sanitized)
return sanitized.strip()
def hash_password(password: str) -> str:
"""Hash password using a secure algorithm."""
# Generate a random salt
salt = secrets.token_hex(16)
# Hash password with salt
password_hash = hashlib.pbkdf2_hmac('sha256',
password.encode('utf-8'),
salt.encode('utf-8'),
100000)
return f"{salt}:{password_hash.hex()}"
def verify_password(password: str, stored_hash: str) -> bool:
"""Verify password against stored hash."""
try:
salt, hash_hex = stored_hash.split(':')
password_hash = hashlib.pbkdf2_hmac('sha256',
password.encode('utf-8'),
salt.encode('utf-8'),
100000)
return password_hash.hex() == hash_hex
except ValueError:
return False
# SQL injection prevention
def safe_database_query(connection, user_id: int) -> List[Dict]:
"""Execute database query safely using parameterized queries."""
# BAD: f"SELECT * FROM users WHERE id = {user_id}"
# GOOD: Use parameterized queries
cursor = connection.cursor()
cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
return cursor.fetchall()import os
from pathlib import Path
from typing import Optional
from dataclasses import dataclass
@dataclass
class AppConfig:
"""Application configuration with environment variables."""
# Database settings
database_url: str
database_pool_size: int = 10
# Security settings
secret_key: str
jwt_expiration_hours: int = 24
# External APIs
redis_url: Optional[str] = None
email_api_key: Optional[str] = None
# Application settings
debug: bool = False
log_level: str = "INFO"
@classmethod
def from_environment(cls) -> 'AppConfig':
"""Create configuration from environment variables."""
return cls(
database_url=os.environ.get('DATABASE_URL', 'sqlite:///app.db'),
database_pool_size=int(os.environ.get('DB_POOL_SIZE', '10')),
secret_key=os.environ.get('SECRET_KEY', ''),
jwt_expiration_hours=int(os.environ.get('JWT_EXPIRATION_HOURS', '24')),
redis_url=os.environ.get('REDIS_URL'),
email_api_key=os.environ.get('EMAIL_API_KEY'),
debug=os.environ.get('DEBUG', 'false').lower() == 'true',
log_level=os.environ.get('LOG_LEVEL', 'INFO').upper()
)
def validate(self) -> None:
"""Validate configuration settings."""
if not self.secret_key:
raise ValueError("SECRET_KEY environment variable is required")
if len(self.secret_key) < 32:
raise ValueError("SECRET_KEY must be at least 32 characters long")
if self.database_pool_size <= 0:
raise ValueError("Database pool size must be positive")
# Environment-specific settings
class DevelopmentConfig(AppConfig):
debug: bool = True
log_level: str = "DEBUG"
class ProductionConfig(AppConfig):
debug: bool = False
log_level: str = "WARNING"
def get_config() -> AppConfig:
"""Get configuration based on environment."""
env = os.environ.get('ENVIRONMENT', 'development')
if env == 'production':
config = ProductionConfig.from_environment()
else:
config = DevelopmentConfig.from_environment()
config.validate()
return configimport logging
import json
import sys
from datetime import datetime
from typing import Dict, Any
class JSONFormatter(logging.Formatter):
"""Custom JSON formatter for structured logging."""
def format(self, record: logging.LogRecord) -> str:
log_entry = {
'timestamp': datetime.utcnow().isoformat(),
'level': record.levelname,
'logger': record.name,
'message': record.getMessage(),
'module': record.module,
'function': record.funcName,
'line': record.lineno
}
# Add exception info if present
if record.exc_info:
log_entry['exception'] = self.formatException(record.exc_info)
# Add extra fields
for key, value in record.__dict__.items():
if key not in ['name', 'msg', 'args', 'levelname', 'levelno',
'pathname', 'filename', 'module', 'exc_info',
'exc_text', 'stack_info', 'lineno', 'funcName',
'created', 'msecs', 'relativeCreated', 'thread',
'threadName', 'processName', 'process', 'getMessage']:
log_entry[key] = value
return json.dumps(log_entry)
def setup_logging(log_level: str = "INFO") -> None:
"""Setup application logging configuration."""
# Create formatter
formatter = JSONFormatter()
# Console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
# File handler
file_handler = logging.FileHandler('app.log')
file_handler.setFormatter(formatter)
# Root logger configuration
logging.basicConfig(
level=getattr(logging, log_level.upper()),
handlers=[console_handler, file_handler]
)
# Context manager for request tracking
from contextvars import ContextVar
import uuid
request_id: ContextVar[str] = ContextVar('request_id')
class RequestTracker:
"""Track requests with unique IDs for better debugging."""
def __enter__(self):
request_id.set(str(uuid.uuid4()))
return self
def __exit__(self, exc_type, exc_val, exc_tb):
request_id.set('')
# Usage in Flask/FastAPI
def log_with_context(message: str, **kwargs):
"""Log message with request context."""
logger = logging.getLogger(__name__)
extra_data = {
'request_id': request_id.get(''),
**kwargs
}
logger.info(message, extra=extra_data)
# Example usage
with RequestTracker():
log_with_context("Processing user request", user_id=123, action="login")repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
language_version: python3
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.0.1
hooks:
- id: mypy#!/usr/bin/env python3
"""Setup script for development environment."""
import subprocess
import sys
from pathlib import Path
def run_command(command: list) -> None:
"""Run shell command and handle errors."""
try:
subprocess.run(command, check=True)
print(f"✓ {' '.join(command)}")
except subprocess.CalledProcessError as e:
print(f"✗ Failed to run: {' '.join(command)}")
sys.exit(1)
def setup_development_environment():
"""Setup complete development environment."""
print("Setting up Python development environment...")
# Install development dependencies
run_command(["pip", "install", "-r", "requirements-dev.txt"])
# Install pre-commit hooks
run_command(["pre-commit", "install"])
# Run initial code formatting
run_command(["black", "src/"])
run_command(["isort", "src/"])
# Run linting
run_command(["flake8", "src/"])
# Run type checking
run_command(["mypy", "src/"])
# Run tests
run_command(["pytest", "tests/"])
print("\n✓ Development environment setup complete!")
print("\nNext steps:")
print("1. Copy .env.example to .env and configure")
print("2. Run 'python -m src.main' to start the application")
if __name__ == "__main__":
setup_development_environment()This guide provides a solid foundation for writing high-quality Python code. Remember to:
- Follow PEP 8 for consistent code style
- Use type hints for better code documentation and IDE support
- Write comprehensive tests for all functionality
- Handle exceptions properly with specific exception types
- Document your code with clear docstrings
- Use appropriate data structures for performance
- Implement security best practices for user input and data handling
- Set up automated tools for code quality and consistency
Regularly review and update these practices as the Python ecosystem evolves and new best practices emerge.