Created
March 3, 2021 16:54
-
-
Save vschmidt94/91518e3c40e0f1284401297049439376 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Partition Pandas dataframe based on sentinel value\n", | |
| "\n", | |
| "First create some sample data - simulate what is retrieved from database" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>timestamp</th>\n", | |
| " <th>motor_rpm</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <td>0</td>\n", | |
| " <td>2021-03-03 15:52:28.312731</td>\n", | |
| " <td>370</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>1</td>\n", | |
| " <td>2021-03-03 15:53:28.312731</td>\n", | |
| " <td>290</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>2</td>\n", | |
| " <td>2021-03-03 15:54:28.312731</td>\n", | |
| " <td>130</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>3</td>\n", | |
| " <td>2021-03-03 15:55:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>4</td>\n", | |
| " <td>2021-03-03 15:56:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>995</td>\n", | |
| " <td>2021-03-04 08:27:28.312731</td>\n", | |
| " <td>610</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>996</td>\n", | |
| " <td>2021-03-04 08:28:28.312731</td>\n", | |
| " <td>260</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>997</td>\n", | |
| " <td>2021-03-04 08:29:28.312731</td>\n", | |
| " <td>210</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>998</td>\n", | |
| " <td>2021-03-04 08:30:28.312731</td>\n", | |
| " <td>1260</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>999</td>\n", | |
| " <td>2021-03-04 08:31:28.312731</td>\n", | |
| " <td>1040</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>1000 rows × 2 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " timestamp motor_rpm\n", | |
| "0 2021-03-03 15:52:28.312731 370\n", | |
| "1 2021-03-03 15:53:28.312731 290\n", | |
| "2 2021-03-03 15:54:28.312731 130\n", | |
| "3 2021-03-03 15:55:28.312731 0\n", | |
| "4 2021-03-03 15:56:28.312731 0\n", | |
| ".. ... ...\n", | |
| "995 2021-03-04 08:27:28.312731 610\n", | |
| "996 2021-03-04 08:28:28.312731 260\n", | |
| "997 2021-03-04 08:29:28.312731 210\n", | |
| "998 2021-03-04 08:30:28.312731 1260\n", | |
| "999 2021-03-04 08:31:28.312731 1040\n", | |
| "\n", | |
| "[1000 rows x 2 columns]" | |
| ] | |
| }, | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import random, datetime\n", | |
| "import pandas\n", | |
| "\n", | |
| "d = {'timestamp': [], 'motor_rpm': []}\n", | |
| "now = datetime.datetime.utcnow()\n", | |
| "for i in range(1000):\n", | |
| " rpm = random.randrange(100,1500, 10)\n", | |
| " d['timestamp'].append(now)\n", | |
| " d['motor_rpm'].append(rpm)\n", | |
| " now += datetime.timedelta(minutes=1)\n", | |
| " \n", | |
| "# Insert 5 \"events\" where motor rpm goes to zero\n", | |
| "# An \"event\" is where we see 1 or more contiguous readings of zero rpm.\n", | |
| "# i.e., if the motor stops - it could be down for a minute or several minutes\n", | |
| "zero_rpm_indices = [3,4,\n", | |
| " 299,\n", | |
| " 466, 467, 468, 469,\n", | |
| " 700, 701,\n", | |
| " 950, 951, 952, 953, 954]\n", | |
| "for idx in zero_rpm_indices:\n", | |
| " d['motor_rpm'][idx] = 0\n", | |
| "\n", | |
| "df = pandas.DataFrame.from_dict(d)\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Partitioning\n", | |
| "\n", | |
| "Now that we have a reasonable facsimile of the sensor data, the problem is identifying partition boundaries.\n", | |
| "\n", | |
| "The 0 values in `motor_rpm` are our sentinels - but note we may have a continguous series of the sentinel value/\n", | |
| "\n", | |
| "For my purposes, I define a partition as:\n", | |
| " 1. Starting at the first non-zero RPM value following a 0 rpm record. In the case the first record is non-zero RPM, that will also begin a partition\n", | |
| " 2. Ending at the last occurrence of a zero RPM value before a non-zero RPM value. In all cases, the last record ends the last partition." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>timestamp</th>\n", | |
| " <th>motor_rpm</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <td>3</td>\n", | |
| " <td>2021-03-03 15:55:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>4</td>\n", | |
| " <td>2021-03-03 15:56:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>299</td>\n", | |
| " <td>2021-03-03 20:51:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>466</td>\n", | |
| " <td>2021-03-03 23:38:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>467</td>\n", | |
| " <td>2021-03-03 23:39:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>468</td>\n", | |
| " <td>2021-03-03 23:40:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>469</td>\n", | |
| " <td>2021-03-03 23:41:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>700</td>\n", | |
| " <td>2021-03-04 03:32:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>701</td>\n", | |
| " <td>2021-03-04 03:33:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>950</td>\n", | |
| " <td>2021-03-04 07:42:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>951</td>\n", | |
| " <td>2021-03-04 07:43:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>952</td>\n", | |
| " <td>2021-03-04 07:44:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>953</td>\n", | |
| " <td>2021-03-04 07:45:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>954</td>\n", | |
| " <td>2021-03-04 07:46:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " timestamp motor_rpm\n", | |
| "3 2021-03-03 15:55:28.312731 0\n", | |
| "4 2021-03-03 15:56:28.312731 0\n", | |
| "299 2021-03-03 20:51:28.312731 0\n", | |
| "466 2021-03-03 23:38:28.312731 0\n", | |
| "467 2021-03-03 23:39:28.312731 0\n", | |
| "468 2021-03-03 23:40:28.312731 0\n", | |
| "469 2021-03-03 23:41:28.312731 0\n", | |
| "700 2021-03-04 03:32:28.312731 0\n", | |
| "701 2021-03-04 03:33:28.312731 0\n", | |
| "950 2021-03-04 07:42:28.312731 0\n", | |
| "951 2021-03-04 07:43:28.312731 0\n", | |
| "952 2021-03-04 07:44:28.312731 0\n", | |
| "953 2021-03-04 07:45:28.312731 0\n", | |
| "954 2021-03-04 07:46:28.312731 0" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Step 1: Find the zero-rpm events\n", | |
| "zero_rpm = df.loc[df['motor_rpm'] == 0]\n", | |
| "zero_rpm" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "3 1\n", | |
| "4 1\n", | |
| "299 2\n", | |
| "466 3\n", | |
| "467 3\n", | |
| "468 3\n", | |
| "469 3\n", | |
| "700 4\n", | |
| "701 4\n", | |
| "950 5\n", | |
| "951 5\n", | |
| "952 5\n", | |
| "953 5\n", | |
| "954 5\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Step 2: Create a map of indexes to zero-rpm records\n", | |
| "# i.e., we'll see what partition each zero-rpm event 'belongs' to.\n", | |
| "m = zero_rpm.index.to_series().diff().ne(1).cumsum()\n", | |
| "m" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(0, 4), (5, 299), (300, 469), (470, 701), (702, 954), (955, 999)]" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Step 3: Walk the map elements and use that to definepart partition start, stop tuples\n", | |
| "def partition(df):\n", | |
| " partitions = []\n", | |
| " current_event = 1\n", | |
| " partition_start_idx = 0\n", | |
| " partition_end_idx = 0\n", | |
| " for idx, event in m.items():\n", | |
| " if event != current_event:\n", | |
| " partitions.append((partition_start_idx, partition_end_idx))\n", | |
| " partition_start_idx = partition_end_idx + 1\n", | |
| " current_event = event\n", | |
| " partition_end_idx = idx\n", | |
| " \n", | |
| " # The last event falls outside the loop\n", | |
| " partitions.append((partition_start_idx, partition_end_idx))\n", | |
| "\n", | |
| " # Finally, capture all remaining records (if any) after the last event.\n", | |
| " if partition_end_idx < len(df.index) - 1:\n", | |
| " partitions.append((partition_end_idx + 1, len(df.index) - 1))\n", | |
| " \n", | |
| " return partitions\n", | |
| "\n", | |
| "partition(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Check the edge cases\n", | |
| "\n", | |
| "That appears to work, but let's check edge cases where our data has zero-rpm event at beginning and/or end of the dataframe as well." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>timestamp</th>\n", | |
| " <th>motor_rpm</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <td>0</td>\n", | |
| " <td>2021-03-03 15:52:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>1</td>\n", | |
| " <td>2021-03-03 15:53:28.312731</td>\n", | |
| " <td>290</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>2</td>\n", | |
| " <td>2021-03-03 15:54:28.312731</td>\n", | |
| " <td>130</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>3</td>\n", | |
| " <td>2021-03-03 15:55:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>4</td>\n", | |
| " <td>2021-03-03 15:56:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>995</td>\n", | |
| " <td>2021-03-04 08:27:28.312731</td>\n", | |
| " <td>610</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>996</td>\n", | |
| " <td>2021-03-04 08:28:28.312731</td>\n", | |
| " <td>260</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>997</td>\n", | |
| " <td>2021-03-04 08:29:28.312731</td>\n", | |
| " <td>210</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>998</td>\n", | |
| " <td>2021-03-04 08:30:28.312731</td>\n", | |
| " <td>1260</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>999</td>\n", | |
| " <td>2021-03-04 08:31:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>1000 rows × 2 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " timestamp motor_rpm\n", | |
| "0 2021-03-03 15:52:28.312731 0\n", | |
| "1 2021-03-03 15:53:28.312731 290\n", | |
| "2 2021-03-03 15:54:28.312731 130\n", | |
| "3 2021-03-03 15:55:28.312731 0\n", | |
| "4 2021-03-03 15:56:28.312731 0\n", | |
| ".. ... ...\n", | |
| "995 2021-03-04 08:27:28.312731 610\n", | |
| "996 2021-03-04 08:28:28.312731 260\n", | |
| "997 2021-03-04 08:29:28.312731 210\n", | |
| "998 2021-03-04 08:30:28.312731 1260\n", | |
| "999 2021-03-04 08:31:28.312731 0\n", | |
| "\n", | |
| "[1000 rows x 2 columns]" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.at[0, 'motor_rpm'] = 0\n", | |
| "df.at[len(df.index) - 1, 'motor_rpm'] = 0\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(0, 4), (5, 299), (300, 469), (470, 701), (702, 954), (955, 999)]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "partitions = partition(df)\n", | |
| "partitions" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Next steps\n", | |
| "\n", | |
| "Now, I can analyze specific partitions as needed" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>timestamp</th>\n", | |
| " <th>motor_rpm</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <td>0</td>\n", | |
| " <td>2021-03-03 15:52:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>1</td>\n", | |
| " <td>2021-03-03 15:53:28.312731</td>\n", | |
| " <td>290</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>2</td>\n", | |
| " <td>2021-03-03 15:54:28.312731</td>\n", | |
| " <td>130</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>3</td>\n", | |
| " <td>2021-03-03 15:55:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>4</td>\n", | |
| " <td>2021-03-03 15:56:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>995</td>\n", | |
| " <td>2021-03-04 08:27:28.312731</td>\n", | |
| " <td>610</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>996</td>\n", | |
| " <td>2021-03-04 08:28:28.312731</td>\n", | |
| " <td>260</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>997</td>\n", | |
| " <td>2021-03-04 08:29:28.312731</td>\n", | |
| " <td>210</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>998</td>\n", | |
| " <td>2021-03-04 08:30:28.312731</td>\n", | |
| " <td>1260</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <td>999</td>\n", | |
| " <td>2021-03-04 08:31:28.312731</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>1000 rows × 2 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " timestamp motor_rpm\n", | |
| "0 2021-03-03 15:52:28.312731 0\n", | |
| "1 2021-03-03 15:53:28.312731 290\n", | |
| "2 2021-03-03 15:54:28.312731 130\n", | |
| "3 2021-03-03 15:55:28.312731 0\n", | |
| "4 2021-03-03 15:56:28.312731 0\n", | |
| ".. ... ...\n", | |
| "995 2021-03-04 08:27:28.312731 610\n", | |
| "996 2021-03-04 08:28:28.312731 260\n", | |
| "997 2021-03-04 08:29:28.312731 210\n", | |
| "998 2021-03-04 08:30:28.312731 1260\n", | |
| "999 2021-03-04 08:31:28.312731 0\n", | |
| "\n", | |
| "[1000 rows x 2 columns]" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Parition 0: df[0:5]\n", | |
| " motor_rpm\n", | |
| "count 5.000000\n", | |
| "mean 84.000000\n", | |
| "std 128.179562\n", | |
| "min 0.000000\n", | |
| "25% 0.000000\n", | |
| "50% 0.000000\n", | |
| "75% 130.000000\n", | |
| "max 290.000000\n", | |
| "\n", | |
| "Parition 1: df[5:300]\n", | |
| " motor_rpm\n", | |
| "count 295.000000\n", | |
| "mean 815.457627\n", | |
| "std 387.093272\n", | |
| "min 0.000000\n", | |
| "25% 500.000000\n", | |
| "50% 810.000000\n", | |
| "75% 1170.000000\n", | |
| "max 1490.000000\n", | |
| "\n", | |
| "Parition 2: df[300:470]\n", | |
| " motor_rpm\n", | |
| "count 170.000000\n", | |
| "mean 747.058824\n", | |
| "std 418.921759\n", | |
| "min 0.000000\n", | |
| "25% 410.000000\n", | |
| "50% 710.000000\n", | |
| "75% 1070.000000\n", | |
| "max 1490.000000\n", | |
| "\n", | |
| "Parition 3: df[470:702]\n", | |
| " motor_rpm\n", | |
| "count 232.000000\n", | |
| "mean 778.879310\n", | |
| "std 420.860326\n", | |
| "min 0.000000\n", | |
| "25% 430.000000\n", | |
| "50% 770.000000\n", | |
| "75% 1150.000000\n", | |
| "max 1490.000000\n", | |
| "\n", | |
| "Parition 4: df[702:955]\n", | |
| " motor_rpm\n", | |
| "count 253.000000\n", | |
| "mean 789.960474\n", | |
| "std 427.263810\n", | |
| "min 0.000000\n", | |
| "25% 410.000000\n", | |
| "50% 850.000000\n", | |
| "75% 1170.000000\n", | |
| "max 1480.000000\n", | |
| "\n", | |
| "Parition 5: df[955:1000]\n", | |
| " motor_rpm\n", | |
| "count 45.000000\n", | |
| "mean 732.444444\n", | |
| "std 439.633916\n", | |
| "min 0.000000\n", | |
| "25% 340.000000\n", | |
| "50% 630.000000\n", | |
| "75% 1040.000000\n", | |
| "max 1490.000000\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for i, (start, stop) in enumerate(partitions):\n", | |
| " print(f\"\\nParition {i}: df[{start}:{stop + 1}]\")\n", | |
| " part_df = df[start:stop + 1]\n", | |
| " print(part_df.describe())" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.4" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment