madhurprash · November 13, 2024 17:37 · madhurprash · Nov 13, 2024
diff --git a/sagemaker_mlflow_simple_example.ipynb b/sagemaker_mlflow_simple_example.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Experiment logging with MLflow in SageMaker\n",
    "---\n",
    "\n",
    "In this gist, we do the following:\n",
    "\n",
    "1. Set up `mlflow` tracking with the SageMaker tracking server.\n",
    "\n",
    "1. Create a simple classification experiment.\n",
    "\n",
    "1. Use MLFlow to input logs, metrics, parameters and model with simple API calls.\n",
    "\n",
    "View more about LLM experimentation with MLflow on SageMaker here: https://aws.amazon.com/blogs/machine-learning/llm-experimentation-at-scale-using-amazon-sagemaker-pipelines-and-mlflow/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import necessary libraries\n",
    "import mlflow\n",
    "import logging\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
    "\n",
    "# set a logger\n",
    "logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)\n",
    "logger = logging.getLogger(__name__)\n",
    "\n",
    "# Set up MLflow tracking\n",
    "# Replace with your MLflow ARN - you can find the MLFlow server\n",
    "# arn on sagemaker studio. View more about LLM experimentation with MLflow on SageMaker\n",
    "# here: https://aws.amazon.com/blogs/machine-learning/llm-experimentation-at-scale-using-amazon-sagemaker-pipelines-and-mlflow/\n",
    "mlflow_arn = \"<your-mlflow-tracking-arn\" \n",
    "experiment_name = \"simple_classification_experiment\"\n",
    "\n",
    "# Set the mlflow tracking uri and experiment name\n",
    "mlflow.set_tracking_uri(mlflow_arn)\n",
    "mlflow.set_experiment(experiment_name)\n",
    "\n",
    "# Generate sample data - this is dummy data that we will use\n",
    "X = np.random.randn(100, 4)\n",
    "y = np.random.randint(0, 2, 100)\n",
    "\n",
    "# Split the data into train, test datasets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "# Start MLflow run\n",
    "with mlflow.start_run(run_name=\"random_forest_run\") as run:\n",
    "    # Log input data as pandas DataFrames\n",
    "    train_df = pd.DataFrame(X_train)\n",
    "    test_df = pd.DataFrame(X_test)\n",
    "    \n",
    "    train_data = mlflow.data.from_pandas(train_df, source=\"train_data\")\n",
    "    test_data = mlflow.data.from_pandas(test_df, source=\"test_data\")\n",
    "    \n",
    "    # Use the mlflow.log_input api to log input data as a df\n",
    "    mlflow.log_input(train_data, context=\"training\")\n",
    "    mlflow.log_input(test_data, context=\"testing\")\n",
    "    \n",
    "    # Set and log parameters\n",
    "    params = {\n",
    "        \"n_estimators\": 100,\n",
    "        \"max_depth\": 5,\n",
    "        \"random_state\": 42\n",
    "    }\n",
    "\n",
    "    # Use mlflow to log parameters\n",
    "    mlflow.log_params(params)\n",
    "    \n",
    "    # Train model\n",
    "    rf = RandomForestClassifier(**params)\n",
    "    rf.fit(X_train, y_train)\n",
    "    \n",
    "    # Make predictions\n",
    "    y_pred = rf.predict(X_test)\n",
    "    \n",
    "    # Calculate and log metrics\n",
    "    metrics = {\n",
    "        \"accuracy\": accuracy_score(y_test, y_pred),\n",
    "        \"precision\": precision_score(y_test, y_pred),\n",
    "        \"recall\": recall_score(y_test, y_pred)\n",
    "    }\n",
    "    mlflow.log_metrics(metrics)\n",
    "    # Log the model\n",
    "    mlflow.sklearn.log_model(rf, \"random_forest_model\")\n",
    "\n",
    "logger.info(f\"Run ID: {run.info.run_id}\")\n",
    "logger.info(f\"Experiment ID: {run.info.experiment_id}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Experiment logging with MLflow in SageMaker\n",
	"---\n",
	"\n",
	"In this gist, we do the following:\n",
	"\n",
	"1. Set up `mlflow` tracking with the SageMaker tracking server.\n",
	"\n",
	"1. Create a simple classification experiment.\n",
	"\n",
	"1. Use MLFlow to input logs, metrics, parameters and model with simple API calls.\n",
	"\n",
	"View more about LLM experimentation with MLflow on SageMaker here: https://aws.amazon.com/blogs/machine-learning/llm-experimentation-at-scale-using-amazon-sagemaker-pipelines-and-mlflow/"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"# import necessary libraries\n",
	"import mlflow\n",
	"import logging\n",
	"import numpy as np\n",
	"import pandas as pd\n",
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.ensemble import RandomForestClassifier\n",
	"from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
	"\n",
	"# set a logger\n",
	"logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)\n",
	"logger = logging.getLogger(__name__)\n",
	"\n",
	"# Set up MLflow tracking\n",
	"# Replace with your MLflow ARN - you can find the MLFlow server\n",
	"# arn on sagemaker studio. View more about LLM experimentation with MLflow on SageMaker\n",
	"# here: https://aws.amazon.com/blogs/machine-learning/llm-experimentation-at-scale-using-amazon-sagemaker-pipelines-and-mlflow/\n",
	"mlflow_arn = \"<your-mlflow-tracking-arn\" \n",
	"experiment_name = \"simple_classification_experiment\"\n",
	"\n",
	"# Set the mlflow tracking uri and experiment name\n",
	"mlflow.set_tracking_uri(mlflow_arn)\n",
	"mlflow.set_experiment(experiment_name)\n",
	"\n",
	"# Generate sample data - this is dummy data that we will use\n",
	"X = np.random.randn(100, 4)\n",
	"y = np.random.randint(0, 2, 100)\n",
	"\n",
	"# Split the data into train, test datasets\n",
	"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
	"\n",
	"# Start MLflow run\n",
	"with mlflow.start_run(run_name=\"random_forest_run\") as run:\n",
	" # Log input data as pandas DataFrames\n",
	" train_df = pd.DataFrame(X_train)\n",
	" test_df = pd.DataFrame(X_test)\n",
	" \n",
	" train_data = mlflow.data.from_pandas(train_df, source=\"train_data\")\n",
	" test_data = mlflow.data.from_pandas(test_df, source=\"test_data\")\n",
	" \n",
	" # Use the mlflow.log_input api to log input data as a df\n",
	" mlflow.log_input(train_data, context=\"training\")\n",
	" mlflow.log_input(test_data, context=\"testing\")\n",
	" \n",
	" # Set and log parameters\n",
	" params = {\n",
	" \"n_estimators\": 100,\n",
	" \"max_depth\": 5,\n",
	" \"random_state\": 42\n",
	" }\n",
	"\n",
	" # Use mlflow to log parameters\n",
	" mlflow.log_params(params)\n",
	" \n",
	" # Train model\n",
	" rf = RandomForestClassifier(**params)\n",
	" rf.fit(X_train, y_train)\n",
	" \n",
	" # Make predictions\n",
	" y_pred = rf.predict(X_test)\n",
	" \n",
	" # Calculate and log metrics\n",
	" metrics = {\n",
	" \"accuracy\": accuracy_score(y_test, y_pred),\n",
	" \"precision\": precision_score(y_test, y_pred),\n",
	" \"recall\": recall_score(y_test, y_pred)\n",
	" }\n",
	" mlflow.log_metrics(metrics)\n",
	" # Log the model\n",
	" mlflow.sklearn.log_model(rf, \"random_forest_model\")\n",
	"\n",
	"logger.info(f\"Run ID: {run.info.run_id}\")\n",
	"logger.info(f\"Experiment ID: {run.info.experiment_id}\")\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "base",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found