Created
November 13, 2024 17:37
-
-
Save madhurprash/e06d3d10cb942e7fd7c089acf282d844 to your computer and use it in GitHub Desktop.
In this gist, we do the following: Set up `mlflow` tracking with the SageMaker tracking server, create a simple classification experiment and use MLFlow to input logs, metrics, parameters and model with simple API calls.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Experiment logging with MLflow in SageMaker\n", | |
| "---\n", | |
| "\n", | |
| "In this gist, we do the following:\n", | |
| "\n", | |
| "1. Set up `mlflow` tracking with the SageMaker tracking server.\n", | |
| "\n", | |
| "1. Create a simple classification experiment.\n", | |
| "\n", | |
| "1. Use MLFlow to input logs, metrics, parameters and model with simple API calls.\n", | |
| "\n", | |
| "View more about LLM experimentation with MLflow on SageMaker here: https://aws.amazon.com/blogs/machine-learning/llm-experimentation-at-scale-using-amazon-sagemaker-pipelines-and-mlflow/" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# import necessary libraries\n", | |
| "import mlflow\n", | |
| "import logging\n", | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "from sklearn.model_selection import train_test_split\n", | |
| "from sklearn.ensemble import RandomForestClassifier\n", | |
| "from sklearn.metrics import accuracy_score, precision_score, recall_score\n", | |
| "\n", | |
| "# set a logger\n", | |
| "logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)\n", | |
| "logger = logging.getLogger(__name__)\n", | |
| "\n", | |
| "# Set up MLflow tracking\n", | |
| "# Replace with your MLflow ARN - you can find the MLFlow server\n", | |
| "# arn on sagemaker studio. View more about LLM experimentation with MLflow on SageMaker\n", | |
| "# here: https://aws.amazon.com/blogs/machine-learning/llm-experimentation-at-scale-using-amazon-sagemaker-pipelines-and-mlflow/\n", | |
| "mlflow_arn = \"<your-mlflow-tracking-arn\" \n", | |
| "experiment_name = \"simple_classification_experiment\"\n", | |
| "\n", | |
| "# Set the mlflow tracking uri and experiment name\n", | |
| "mlflow.set_tracking_uri(mlflow_arn)\n", | |
| "mlflow.set_experiment(experiment_name)\n", | |
| "\n", | |
| "# Generate sample data - this is dummy data that we will use\n", | |
| "X = np.random.randn(100, 4)\n", | |
| "y = np.random.randint(0, 2, 100)\n", | |
| "\n", | |
| "# Split the data into train, test datasets\n", | |
| "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", | |
| "\n", | |
| "# Start MLflow run\n", | |
| "with mlflow.start_run(run_name=\"random_forest_run\") as run:\n", | |
| " # Log input data as pandas DataFrames\n", | |
| " train_df = pd.DataFrame(X_train)\n", | |
| " test_df = pd.DataFrame(X_test)\n", | |
| " \n", | |
| " train_data = mlflow.data.from_pandas(train_df, source=\"train_data\")\n", | |
| " test_data = mlflow.data.from_pandas(test_df, source=\"test_data\")\n", | |
| " \n", | |
| " # Use the mlflow.log_input api to log input data as a df\n", | |
| " mlflow.log_input(train_data, context=\"training\")\n", | |
| " mlflow.log_input(test_data, context=\"testing\")\n", | |
| " \n", | |
| " # Set and log parameters\n", | |
| " params = {\n", | |
| " \"n_estimators\": 100,\n", | |
| " \"max_depth\": 5,\n", | |
| " \"random_state\": 42\n", | |
| " }\n", | |
| "\n", | |
| " # Use mlflow to log parameters\n", | |
| " mlflow.log_params(params)\n", | |
| " \n", | |
| " # Train model\n", | |
| " rf = RandomForestClassifier(**params)\n", | |
| " rf.fit(X_train, y_train)\n", | |
| " \n", | |
| " # Make predictions\n", | |
| " y_pred = rf.predict(X_test)\n", | |
| " \n", | |
| " # Calculate and log metrics\n", | |
| " metrics = {\n", | |
| " \"accuracy\": accuracy_score(y_test, y_pred),\n", | |
| " \"precision\": precision_score(y_test, y_pred),\n", | |
| " \"recall\": recall_score(y_test, y_pred)\n", | |
| " }\n", | |
| " mlflow.log_metrics(metrics)\n", | |
| " # Log the model\n", | |
| " mlflow.sklearn.log_model(rf, \"random_forest_model\")\n", | |
| "\n", | |
| "logger.info(f\"Run ID: {run.info.run_id}\")\n", | |
| "logger.info(f\"Experiment ID: {run.info.experiment_id}\")\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "base", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.11.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
View the output on mlflow below:

