Created
July 12, 2016 23:44
-
-
Save sinhrks/104b831cbb680f9c1beb76534c742fdf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Q. 指定したキーが変わる度にナンバリングしたい\n", | |
| "\n", | |
| "groupbyで分けたグループにナンバリングするようなイメージ\n", | |
| "\n", | |
| "```\n", | |
| "result:\n", | |
| "-------\n", | |
| "\tc1\tc2\tc3\tgroup\n", | |
| "0\ta\ta\ta\t0\n", | |
| "1\ta\ta\ta\t0\n", | |
| "2\ta\ta\tb\t1\n", | |
| "3\ta\tb\tb\t2\n", | |
| "4\ta\tb\tb\t2\n", | |
| "5\tb\tb\tc\t3\n", | |
| "6\tb\tc\tc\t4\n", | |
| "7\tb\tc\tc\t4\n", | |
| "8\tb\tc\tc\t4\n", | |
| "```\n", | |
| "\n", | |
| "https://gist.github.com/KazukiOnodera/ed158c161ede48067c475b6406bef330\n", | |
| "\n", | |
| "## A" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.DataFrame([['a', 'a', 'a'],\n", | |
| " ['a', 'a', 'a'],\n", | |
| " ['a', 'a', 'b'],\n", | |
| " ['a', 'b', 'b'],\n", | |
| " ['a', 'b', 'b'],\n", | |
| " ['b', 'b', 'c'],\n", | |
| " ['b', 'c', 'c'],\n", | |
| " ['b', 'c', 'c'],\n", | |
| " ['b', 'c', 'c']],\n", | |
| " columns=['c1','c2','c3'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>c1</th>\n", | |
| " <th>c2</th>\n", | |
| " <th>c3</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " c1 c2 c3\n", | |
| "0 a a a\n", | |
| "1 a a a\n", | |
| "2 a a b\n", | |
| "3 a b b\n", | |
| "4 a b b\n", | |
| "5 b b c\n", | |
| "6 b c c\n", | |
| "7 b c c\n", | |
| "8 b c c" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>c1</th>\n", | |
| " <th>c2</th>\n", | |
| " <th>c3</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " c1 c2 c3\n", | |
| "0 NaN NaN NaN\n", | |
| "1 a a a\n", | |
| "2 a a a\n", | |
| "3 a a b\n", | |
| "4 a b b\n", | |
| "5 a b b\n", | |
| "6 b b c\n", | |
| "7 b c c\n", | |
| "8 b c c" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.shift(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>c1</th>\n", | |
| " <th>c2</th>\n", | |
| " <th>c3</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " c1 c2 c3\n", | |
| "0 True True True\n", | |
| "1 False False False\n", | |
| "2 False False True\n", | |
| "3 False True False\n", | |
| "4 False False False\n", | |
| "5 True False True\n", | |
| "6 False True False\n", | |
| "7 False False False\n", | |
| "8 False False False" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df != df.shift(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0 True\n", | |
| "1 False\n", | |
| "2 True\n", | |
| "3 True\n", | |
| "4 False\n", | |
| "5 True\n", | |
| "6 True\n", | |
| "7 False\n", | |
| "8 False\n", | |
| "dtype: bool" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(df != df.shift(1)).any(axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0 0\n", | |
| "1 0\n", | |
| "2 1\n", | |
| "3 2\n", | |
| "4 2\n", | |
| "5 3\n", | |
| "6 4\n", | |
| "7 4\n", | |
| "8 4\n", | |
| "dtype: int32" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(df != df.shift(1)).any(axis=1).cumsum() - 1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>c1</th>\n", | |
| " <th>c2</th>\n", | |
| " <th>c3</th>\n", | |
| " <th>group</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>a</td>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>a</td>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>b</td>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>b</td>\n", | |
| " <td>c</td>\n", | |
| " <td>c</td>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " c1 c2 c3 group\n", | |
| "0 a a a 0\n", | |
| "1 a a a 0\n", | |
| "2 a a b 1\n", | |
| "3 a b b 2\n", | |
| "4 a b b 2\n", | |
| "5 b b c 3\n", | |
| "6 b c c 4\n", | |
| "7 b c c 4\n", | |
| "8 b c c 4" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df['group'] = (df != df.shift(1)).any(axis=1).cumsum() - 1\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment