Last active
December 14, 2023 08:59
-
-
Save MaxHalford/9fba0c2d6800d0f0643902bf57b99780 to your computer and use it in GitHub Desktop.
Decomposition without and with gaps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Ratio decomposition" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Without gaps" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Some dummy data." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>period</th>\n", | |
| " <th>group</th>\n", | |
| " <th>volume</th>\n", | |
| " <th>rate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>A</td>\n", | |
| " <td>1000</td>\n", | |
| " <td>3.7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>B</td>\n", | |
| " <td>900</td>\n", | |
| " <td>3.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>C</td>\n", | |
| " <td>1200</td>\n", | |
| " <td>4.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>A</td>\n", | |
| " <td>1000</td>\n", | |
| " <td>6.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>B</td>\n", | |
| " <td>900</td>\n", | |
| " <td>3.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>C</td>\n", | |
| " <td>800</td>\n", | |
| " <td>4.6</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " period group volume rate\n", | |
| "0 1999 A 1000 3.7\n", | |
| "1 1999 B 900 3.5\n", | |
| "2 1999 C 1200 4.5\n", | |
| "3 2000 A 1000 6.4\n", | |
| "4 2000 B 900 3.9\n", | |
| "5 2000 C 800 4.6" | |
| ] | |
| }, | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(data := pd.DataFrame(\n", | |
| " [\n", | |
| " (1999, 'A', 1000, 3.7),\n", | |
| " (1999, 'B', 900, 3.5),\n", | |
| " (1999, 'C', 1200, 4.5),\n", | |
| " (2000, 'A', 1000, 6.4),\n", | |
| " (2000, 'B', 900, 3.9),\n", | |
| " (2000, 'C', 800, 4.6),\n", | |
| " ],\n", | |
| " columns=['period', 'group', 'volume', 'rate']\n", | |
| "))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Aggregated rates at each period:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "period\n", | |
| "1999 3.951613\n", | |
| "2000 5.033333\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(period_rates := (\n", | |
| " data\n", | |
| " .groupby('period')\n", | |
| " .apply(lambda x: x['rate'] * x['volume'] / x['volume'].sum()).groupby('period')\n", | |
| " .sum()\n", | |
| "))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Rate difference between periods to explain:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "period\n", | |
| "1999 NaN\n", | |
| "2000 1.08172\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "period_rates.diff()\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "We want to explain the difference through the inner and mix contributions of each group.\n", | |
| "\n", | |
| "Implementation available here: https://maxhalford.github.io/blog/kpi-evolution-decomposition/#implementation-1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>period</th>\n", | |
| " <th>group</th>\n", | |
| " <th>volume</th>\n", | |
| " <th>rate</th>\n", | |
| " <th>total</th>\n", | |
| " <th>period_rate</th>\n", | |
| " <th>share</th>\n", | |
| " <th>rate_lag</th>\n", | |
| " <th>share_lag</th>\n", | |
| " <th>period_rate_lag</th>\n", | |
| " <th>inner</th>\n", | |
| " <th>mix</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>A</td>\n", | |
| " <td>1000</td>\n", | |
| " <td>3.7</td>\n", | |
| " <td>3700.0</td>\n", | |
| " <td>3.951613</td>\n", | |
| " <td>0.322581</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>B</td>\n", | |
| " <td>900</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>3150.0</td>\n", | |
| " <td>3.951613</td>\n", | |
| " <td>0.290323</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>C</td>\n", | |
| " <td>1200</td>\n", | |
| " <td>4.5</td>\n", | |
| " <td>5400.0</td>\n", | |
| " <td>3.951613</td>\n", | |
| " <td>0.387097</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>A</td>\n", | |
| " <td>1000</td>\n", | |
| " <td>6.4</td>\n", | |
| " <td>6400.0</td>\n", | |
| " <td>5.033333</td>\n", | |
| " <td>0.370370</td>\n", | |
| " <td>3.7</td>\n", | |
| " <td>0.322581</td>\n", | |
| " <td>3.951613</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.176822</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>B</td>\n", | |
| " <td>900</td>\n", | |
| " <td>3.9</td>\n", | |
| " <td>3510.0</td>\n", | |
| " <td>5.033333</td>\n", | |
| " <td>0.333333</td>\n", | |
| " <td>3.5</td>\n", | |
| " <td>0.290323</td>\n", | |
| " <td>3.951613</td>\n", | |
| " <td>0.133333</td>\n", | |
| " <td>0.150538</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>C</td>\n", | |
| " <td>800</td>\n", | |
| " <td>4.6</td>\n", | |
| " <td>3680.0</td>\n", | |
| " <td>5.033333</td>\n", | |
| " <td>0.296296</td>\n", | |
| " <td>4.5</td>\n", | |
| " <td>0.387097</td>\n", | |
| " <td>3.951613</td>\n", | |
| " <td>0.029630</td>\n", | |
| " <td>-0.408602</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " period group volume rate total period_rate share rate_lag \\\n", | |
| "0 1999 A 1000 3.7 3700.0 3.951613 0.322581 NaN \n", | |
| "1 1999 B 900 3.5 3150.0 3.951613 0.290323 NaN \n", | |
| "2 1999 C 1200 4.5 5400.0 3.951613 0.387097 NaN \n", | |
| "3 2000 A 1000 6.4 6400.0 5.033333 0.370370 3.7 \n", | |
| "4 2000 B 900 3.9 3510.0 5.033333 0.333333 3.5 \n", | |
| "5 2000 C 800 4.6 3680.0 5.033333 0.296296 4.5 \n", | |
| "\n", | |
| " share_lag period_rate_lag inner mix \n", | |
| "0 NaN NaN NaN NaN \n", | |
| "1 NaN NaN NaN NaN \n", | |
| "2 NaN NaN NaN NaN \n", | |
| "3 0.322581 3.951613 1.000000 0.176822 \n", | |
| "4 0.290323 3.951613 0.133333 0.150538 \n", | |
| "5 0.387097 3.951613 0.029630 -0.408602 " | |
| ] | |
| }, | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "decomp = data.copy()\n", | |
| "decomp['total'] = decomp.eval('volume * rate')\n", | |
| "decomp['period_rate'] = (\n", | |
| " decomp.groupby('period')['total'].transform('sum') /\n", | |
| " decomp.groupby('period')['volume'].transform('sum')\n", | |
| ")\n", | |
| "decomp['share'] = (\n", | |
| " decomp['volume'] /\n", | |
| " decomp.groupby('period')['volume'].transform('sum')\n", | |
| ")\n", | |
| "decomp['rate_lag'] = decomp.groupby('group')['rate'].shift(1)\n", | |
| "decomp['share_lag'] = decomp.groupby('group')['share'].shift(1)\n", | |
| "decomp['period_rate_lag'] = decomp.groupby('group')['period_rate'].shift(1)\n", | |
| "decomp['inner'] = decomp.eval('share * (rate - rate_lag)')\n", | |
| "decomp['mix'] = decomp.eval('(share - share_lag) * rate_lag')\n", | |
| "decomp" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "We can verify the contributions sum up to the rate difference:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Inner: 1.163\n", | |
| "Mix: -0.081\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import math\n", | |
| "\n", | |
| "assert math.isclose(decomp['inner'].sum() + decomp['mix'].sum(), period_rates.diff().sum())\n", | |
| "print(\n", | |
| " f'Inner: {decomp[\"inner\"].sum():.3f}\\n'\n", | |
| " f'Mix: {decomp[\"mix\"].sum():.3f}'\n", | |
| ")\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## With gaps" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>period</th>\n", | |
| " <th>group</th>\n", | |
| " <th>volume</th>\n", | |
| " <th>rate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>A</td>\n", | |
| " <td>1000</td>\n", | |
| " <td>3.7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>C</td>\n", | |
| " <td>1200</td>\n", | |
| " <td>4.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>B</td>\n", | |
| " <td>900</td>\n", | |
| " <td>3.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>C</td>\n", | |
| " <td>800</td>\n", | |
| " <td>4.6</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " period group volume rate\n", | |
| "0 1999 A 1000 3.7\n", | |
| "1 1999 C 1200 4.5\n", | |
| "2 2000 B 900 3.9\n", | |
| "3 2000 C 800 4.6" | |
| ] | |
| }, | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(data := pd.DataFrame(\n", | |
| " [\n", | |
| " (1999, 'A', 1000, 3.7),\n", | |
| " (1999, 'C', 1200, 4.5),\n", | |
| " (2000, 'B', 900, 3.9),\n", | |
| " (2000, 'C', 800, 4.6),\n", | |
| " ],\n", | |
| " columns=['period', 'group', 'volume', 'rate']\n", | |
| "))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "The trick is to add a row for each period where the group is not present." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>period</th>\n", | |
| " <th>group</th>\n", | |
| " <th>volume</th>\n", | |
| " <th>rate</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>A</td>\n", | |
| " <td>1000.0</td>\n", | |
| " <td>3.7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>C</td>\n", | |
| " <td>1200.0</td>\n", | |
| " <td>4.5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>B</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>A</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>C</td>\n", | |
| " <td>800.0</td>\n", | |
| " <td>4.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>B</td>\n", | |
| " <td>900.0</td>\n", | |
| " <td>3.9</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " period group volume rate\n", | |
| "0 1999 A 1000.0 3.7\n", | |
| "1 1999 C 1200.0 4.5\n", | |
| "2 1999 B 0.0 0.0\n", | |
| "3 2000 A 0.0 0.0\n", | |
| "4 2000 C 800.0 4.6\n", | |
| "5 2000 B 900.0 3.9" | |
| ] | |
| }, | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "periods = pd.Series(data.period.unique(), name='period').to_frame()\n", | |
| "groups = pd.Series(data.group.unique(), name='group').to_frame()\n", | |
| "product = periods.merge(groups, how='cross')\n", | |
| "data = pd.merge(data, product, left_on=['period', 'group'], right_on=['period', 'group'], how='right')\n", | |
| "data['volume'] = data['volume'].fillna(0)\n", | |
| "data['rate'] = data['rate'].fillna(0)\n", | |
| "data\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "period\n", | |
| "1999 4.136364\n", | |
| "2000 4.229412\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(period_rates := (\n", | |
| " data\n", | |
| " .groupby('period')\n", | |
| " .apply(lambda x: x['rate'] * x['volume'] / x['volume'].sum()).groupby('period')\n", | |
| " .sum()\n", | |
| "))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "period\n", | |
| "1999 NaN\n", | |
| "2000 0.093048\n", | |
| "dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "period_rates.diff()\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>period</th>\n", | |
| " <th>group</th>\n", | |
| " <th>volume</th>\n", | |
| " <th>rate</th>\n", | |
| " <th>total</th>\n", | |
| " <th>period_rate</th>\n", | |
| " <th>share</th>\n", | |
| " <th>rate_lag</th>\n", | |
| " <th>share_lag</th>\n", | |
| " <th>period_rate_lag</th>\n", | |
| " <th>inner</th>\n", | |
| " <th>mix</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>A</td>\n", | |
| " <td>1000.0</td>\n", | |
| " <td>3.7</td>\n", | |
| " <td>3700.0</td>\n", | |
| " <td>4.136364</td>\n", | |
| " <td>0.454545</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>C</td>\n", | |
| " <td>1200.0</td>\n", | |
| " <td>4.5</td>\n", | |
| " <td>5400.0</td>\n", | |
| " <td>4.136364</td>\n", | |
| " <td>0.545455</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1999</td>\n", | |
| " <td>B</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>4.136364</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>A</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>4.229412</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>3.7</td>\n", | |
| " <td>0.454545</td>\n", | |
| " <td>4.136364</td>\n", | |
| " <td>-0.000000</td>\n", | |
| " <td>-1.681818</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>C</td>\n", | |
| " <td>800.0</td>\n", | |
| " <td>4.6</td>\n", | |
| " <td>3680.0</td>\n", | |
| " <td>4.229412</td>\n", | |
| " <td>0.470588</td>\n", | |
| " <td>4.5</td>\n", | |
| " <td>0.545455</td>\n", | |
| " <td>4.136364</td>\n", | |
| " <td>0.047059</td>\n", | |
| " <td>-0.336898</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>2000</td>\n", | |
| " <td>B</td>\n", | |
| " <td>900.0</td>\n", | |
| " <td>3.9</td>\n", | |
| " <td>3510.0</td>\n", | |
| " <td>4.229412</td>\n", | |
| " <td>0.529412</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>4.136364</td>\n", | |
| " <td>2.064706</td>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " period group volume rate total period_rate share rate_lag \\\n", | |
| "0 1999 A 1000.0 3.7 3700.0 4.136364 0.454545 NaN \n", | |
| "1 1999 C 1200.0 4.5 5400.0 4.136364 0.545455 NaN \n", | |
| "2 1999 B 0.0 0.0 0.0 4.136364 0.000000 NaN \n", | |
| "3 2000 A 0.0 0.0 0.0 4.229412 0.000000 3.7 \n", | |
| "4 2000 C 800.0 4.6 3680.0 4.229412 0.470588 4.5 \n", | |
| "5 2000 B 900.0 3.9 3510.0 4.229412 0.529412 0.0 \n", | |
| "\n", | |
| " share_lag period_rate_lag inner mix \n", | |
| "0 NaN NaN NaN NaN \n", | |
| "1 NaN NaN NaN NaN \n", | |
| "2 NaN NaN NaN NaN \n", | |
| "3 0.454545 4.136364 -0.000000 -1.681818 \n", | |
| "4 0.545455 4.136364 0.047059 -0.336898 \n", | |
| "5 0.000000 4.136364 2.064706 0.000000 " | |
| ] | |
| }, | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "decomp = data.copy()\n", | |
| "decomp['total'] = decomp.eval('volume * rate')\n", | |
| "decomp['period_rate'] = (\n", | |
| " decomp.groupby('period')['total'].transform('sum') /\n", | |
| " decomp.groupby('period')['volume'].transform('sum')\n", | |
| ")\n", | |
| "decomp['share'] = (\n", | |
| " decomp['volume'] /\n", | |
| " decomp.groupby('period')['volume'].transform('sum')\n", | |
| ")\n", | |
| "decomp['rate_lag'] = decomp.groupby('group')['rate'].shift(1)\n", | |
| "decomp['share_lag'] = decomp.groupby('group')['share'].shift(1)\n", | |
| "decomp['period_rate_lag'] = decomp.groupby('group')['period_rate'].shift(1)\n", | |
| "decomp['inner'] = decomp.eval('share * (rate - rate_lag)')\n", | |
| "decomp['mix'] = decomp.eval('(share - share_lag) * rate_lag')\n", | |
| "decomp\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Inner: 2.112\n", | |
| "Mix: -2.019\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import math\n", | |
| "\n", | |
| "assert math.isclose(decomp['inner'].sum() + decomp['mix'].sum(), period_rates.diff().sum())\n", | |
| "print(\n", | |
| " f'Inner: {decomp.query(\"period == 2000\")[\"inner\"].sum():.3f}\\n'\n", | |
| " f'Mix: {decomp.query(\"period == 2000\")[\"mix\"].sum():.3f}'\n", | |
| ")\n" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.11.0" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment