Skip to content

Instantly share code, notes, and snippets.

@madhurprash
Created July 25, 2025 15:57
Show Gist options
  • Select an option

  • Save madhurprash/1b084671492020bb4a4d9f214bb2e535 to your computer and use it in GitHub Desktop.

Select an option

Save madhurprash/1b084671492020bb4a4d9f214bb2e535 to your computer and use it in GitHub Desktop.
Script to download Microsoft Phi-4-mini-instruct model from Hugging Face and upload it to S3 for SageMaker use.
#!/usr/bin/env python3
"""
Script to download Microsoft Phi-4-mini-instruct model from Hugging Face
and upload it to S3 for SageMaker use.
"""
import os
import tempfile
import shutil
from pathlib import Path
import boto3
from botocore.exceptions import ClientError, NoCredentialsError
from huggingface_hub import snapshot_download
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def download_and_upload_model():
# Configuration
MODEL_NAME = "microsoft/Phi-4-mini-instruct"
S3_PATH = "s3://"
# Parse S3 path
s3_parts = S3_PATH.replace("s3://", "").split("/", 1)
bucket_name = s3_parts[0]
s3_prefix = s3_parts[1] if len(s3_parts) > 1 else ""
logger.info(f"Starting download of {MODEL_NAME}")
logger.info(f"Target S3 location: {S3_PATH}")
# Create temporary directory for model download
with tempfile.TemporaryDirectory() as temp_dir:
local_model_path = Path(temp_dir) / "model"
try:
# Download model from Hugging Face
logger.info("Downloading model from Hugging Face...")
snapshot_download(
repo_id=MODEL_NAME,
local_dir=local_model_path,
local_dir_use_symlinks=False,
resume_download=True
)
logger.info(f"Model downloaded to: {local_model_path}")
# Initialize S3 client
try:
s3_client = boto3.client('s3', region_name='us-west-2')
logger.info("S3 client initialized successfully")
except NoCredentialsError:
logger.error("AWS credentials not found. Please configure your AWS credentials.")
return False
# Upload to S3
logger.info("Starting upload to S3...")
upload_success = upload_directory_to_s3(
s3_client,
local_model_path,
bucket_name,
s3_prefix
)
if upload_success:
logger.info(f"Model successfully uploaded to {S3_PATH}")
return True
else:
logger.error("Upload to S3 failed")
return False
except Exception as e:
logger.error(f"Error during model download: {str(e)}")
return False
def upload_directory_to_s3(s3_client, local_directory, bucket_name, s3_prefix):
"""
Upload a directory to S3 recursively
"""
try:
# Check if bucket exists
try:
s3_client.head_bucket(Bucket=bucket_name)
except ClientError as e:
if e.response['Error']['Code'] == '404':
logger.error(f"Bucket {bucket_name} does not exist")
return False
else:
logger.error(f"Error checking bucket: {str(e)}")
return False
# Walk through local directory and upload files
for local_file in Path(local_directory).rglob('*'):
if local_file.is_file():
# Calculate relative path for S3 key
relative_path = local_file.relative_to(local_directory)
s3_key = str(Path(s3_prefix) / relative_path).replace('\\', '/')
logger.info(f"Uploading {local_file.name}...")
try:
s3_client.upload_file(
str(local_file),
bucket_name,
s3_key
)
logger.debug(f"Uploaded: {s3_key}")
except ClientError as e:
logger.error(f"Failed to upload {local_file}: {str(e)}")
return False
logger.info("All files uploaded successfully")
return True
except Exception as e:
logger.error(f"Error during S3 upload: {str(e)}")
return False
def verify_upload(bucket_name, s3_prefix):
"""
Verify that the model files were uploaded correctly
"""
try:
s3_client = boto3.client('s3', region_name='us-west-2')
logger.info("Verifying upload...")
response = s3_client.list_objects_v2(
Bucket=bucket_name,
Prefix=s3_prefix,
MaxKeys=10
)
if 'Contents' in response:
logger.info(f"Found {len(response['Contents'])} files in S3")
for obj in response['Contents'][:5]: # Show first 5 files
logger.info(f" - {obj['Key']} ({obj['Size']} bytes)")
if len(response['Contents']) > 5:
logger.info(f" ... and {len(response['Contents']) - 5} more files")
return True
else:
logger.warning("No files found in S3 location")
return False
except Exception as e:
logger.error(f"Error verifying upload: {str(e)}")
return False
if __name__ == "__main__":
logger.info("Starting Phi-4-mini-instruct model download and S3 upload process")
# Check dependencies
try:
import boto3
import huggingface_hub
except ImportError as e:
logger.error(f"Missing required dependency: {e}")
logger.error("Please install: pip install boto3 huggingface_hub")
exit(1)
# Download and upload model
success = download_and_upload_model()
if success:
# Verify the upload
bucket_name = ""
s3_prefix = "models/microsoft/Phi-4-mini-instruct/"
verify_upload(bucket_name, s3_prefix)
logger.info("Process completed successfully!")
logger.info(f"Model is now available at: s3://sagemaker-us-west-2-218208277580/models/microsoft/Phi-4-mini-instruct/")
else:
logger.error("Process failed. Please check the logs above for details.")
exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment