Created
July 25, 2025 15:57
-
-
Save madhurprash/1b084671492020bb4a4d9f214bb2e535 to your computer and use it in GitHub Desktop.
Script to download Microsoft Phi-4-mini-instruct model from Hugging Face and upload it to S3 for SageMaker use.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Script to download Microsoft Phi-4-mini-instruct model from Hugging Face | |
| and upload it to S3 for SageMaker use. | |
| """ | |
| import os | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| import boto3 | |
| from botocore.exceptions import ClientError, NoCredentialsError | |
| from huggingface_hub import snapshot_download | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| def download_and_upload_model(): | |
| # Configuration | |
| MODEL_NAME = "microsoft/Phi-4-mini-instruct" | |
| S3_PATH = "s3://" | |
| # Parse S3 path | |
| s3_parts = S3_PATH.replace("s3://", "").split("/", 1) | |
| bucket_name = s3_parts[0] | |
| s3_prefix = s3_parts[1] if len(s3_parts) > 1 else "" | |
| logger.info(f"Starting download of {MODEL_NAME}") | |
| logger.info(f"Target S3 location: {S3_PATH}") | |
| # Create temporary directory for model download | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| local_model_path = Path(temp_dir) / "model" | |
| try: | |
| # Download model from Hugging Face | |
| logger.info("Downloading model from Hugging Face...") | |
| snapshot_download( | |
| repo_id=MODEL_NAME, | |
| local_dir=local_model_path, | |
| local_dir_use_symlinks=False, | |
| resume_download=True | |
| ) | |
| logger.info(f"Model downloaded to: {local_model_path}") | |
| # Initialize S3 client | |
| try: | |
| s3_client = boto3.client('s3', region_name='us-west-2') | |
| logger.info("S3 client initialized successfully") | |
| except NoCredentialsError: | |
| logger.error("AWS credentials not found. Please configure your AWS credentials.") | |
| return False | |
| # Upload to S3 | |
| logger.info("Starting upload to S3...") | |
| upload_success = upload_directory_to_s3( | |
| s3_client, | |
| local_model_path, | |
| bucket_name, | |
| s3_prefix | |
| ) | |
| if upload_success: | |
| logger.info(f"Model successfully uploaded to {S3_PATH}") | |
| return True | |
| else: | |
| logger.error("Upload to S3 failed") | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error during model download: {str(e)}") | |
| return False | |
| def upload_directory_to_s3(s3_client, local_directory, bucket_name, s3_prefix): | |
| """ | |
| Upload a directory to S3 recursively | |
| """ | |
| try: | |
| # Check if bucket exists | |
| try: | |
| s3_client.head_bucket(Bucket=bucket_name) | |
| except ClientError as e: | |
| if e.response['Error']['Code'] == '404': | |
| logger.error(f"Bucket {bucket_name} does not exist") | |
| return False | |
| else: | |
| logger.error(f"Error checking bucket: {str(e)}") | |
| return False | |
| # Walk through local directory and upload files | |
| for local_file in Path(local_directory).rglob('*'): | |
| if local_file.is_file(): | |
| # Calculate relative path for S3 key | |
| relative_path = local_file.relative_to(local_directory) | |
| s3_key = str(Path(s3_prefix) / relative_path).replace('\\', '/') | |
| logger.info(f"Uploading {local_file.name}...") | |
| try: | |
| s3_client.upload_file( | |
| str(local_file), | |
| bucket_name, | |
| s3_key | |
| ) | |
| logger.debug(f"Uploaded: {s3_key}") | |
| except ClientError as e: | |
| logger.error(f"Failed to upload {local_file}: {str(e)}") | |
| return False | |
| logger.info("All files uploaded successfully") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error during S3 upload: {str(e)}") | |
| return False | |
| def verify_upload(bucket_name, s3_prefix): | |
| """ | |
| Verify that the model files were uploaded correctly | |
| """ | |
| try: | |
| s3_client = boto3.client('s3', region_name='us-west-2') | |
| logger.info("Verifying upload...") | |
| response = s3_client.list_objects_v2( | |
| Bucket=bucket_name, | |
| Prefix=s3_prefix, | |
| MaxKeys=10 | |
| ) | |
| if 'Contents' in response: | |
| logger.info(f"Found {len(response['Contents'])} files in S3") | |
| for obj in response['Contents'][:5]: # Show first 5 files | |
| logger.info(f" - {obj['Key']} ({obj['Size']} bytes)") | |
| if len(response['Contents']) > 5: | |
| logger.info(f" ... and {len(response['Contents']) - 5} more files") | |
| return True | |
| else: | |
| logger.warning("No files found in S3 location") | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error verifying upload: {str(e)}") | |
| return False | |
| if __name__ == "__main__": | |
| logger.info("Starting Phi-4-mini-instruct model download and S3 upload process") | |
| # Check dependencies | |
| try: | |
| import boto3 | |
| import huggingface_hub | |
| except ImportError as e: | |
| logger.error(f"Missing required dependency: {e}") | |
| logger.error("Please install: pip install boto3 huggingface_hub") | |
| exit(1) | |
| # Download and upload model | |
| success = download_and_upload_model() | |
| if success: | |
| # Verify the upload | |
| bucket_name = "" | |
| s3_prefix = "models/microsoft/Phi-4-mini-instruct/" | |
| verify_upload(bucket_name, s3_prefix) | |
| logger.info("Process completed successfully!") | |
| logger.info(f"Model is now available at: s3://sagemaker-us-west-2-218208277580/models/microsoft/Phi-4-mini-instruct/") | |
| else: | |
| logger.error("Process failed. Please check the logs above for details.") | |
| exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment