Created
February 17, 2017 14:10
-
-
Save mritchie712/d7ebd8a7f782f90bf8db7398d4662c4d to your computer and use it in GitHub Desktop.
CASE or IF/THEN/ELSE statements in Pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import random" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.DataFrame()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "i = 0\n", | |
| "while i < 10:\n", | |
| " df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n", | |
| " i+=1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df['colAB'] = 'A'\n", | |
| "df.loc[df.col0 > 75, 'colAB'] = 'B'\n", | |
| "df.loc[df.col8 > 75, 'colAB'] = None" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df['caseWhen'] = 'X'\n", | |
| "df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n", | |
| "df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n", | |
| "df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n", | |
| "df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n", | |
| "df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "for col in df.columns[:5]:\n", | |
| " colMean = df[col].mean()\n", | |
| " newCol = col + '_GT' + str(int(colMean))\n", | |
| " df[newCol] = df[col] > colMean" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "C 472\n", | |
| "E 228\n", | |
| "B 124\n", | |
| "A 84\n", | |
| "X 83\n", | |
| "Name: caseWhen, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.caseWhen.value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>col0</th>\n", | |
| " <th>col1</th>\n", | |
| " <th>col2</th>\n", | |
| " <th>col3</th>\n", | |
| " <th>col4</th>\n", | |
| " <th>col5</th>\n", | |
| " <th>col6</th>\n", | |
| " <th>col7</th>\n", | |
| " <th>col8</th>\n", | |
| " <th>col9</th>\n", | |
| " <th>colAB</th>\n", | |
| " <th>caseWhen</th>\n", | |
| " <th>col0_GT50</th>\n", | |
| " <th>col1_GT48</th>\n", | |
| " <th>col2_GT51</th>\n", | |
| " <th>col3_GT49</th>\n", | |
| " <th>col4_GT49</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>43</td>\n", | |
| " <td>75</td>\n", | |
| " <td>78</td>\n", | |
| " <td>91</td>\n", | |
| " <td>37</td>\n", | |
| " <td>68</td>\n", | |
| " <td>51</td>\n", | |
| " <td>62</td>\n", | |
| " <td>14</td>\n", | |
| " <td>98</td>\n", | |
| " <td>A</td>\n", | |
| " <td>C</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>77</td>\n", | |
| " <td>59</td>\n", | |
| " <td>73</td>\n", | |
| " <td>32</td>\n", | |
| " <td>3</td>\n", | |
| " <td>95</td>\n", | |
| " <td>38</td>\n", | |
| " <td>31</td>\n", | |
| " <td>66</td>\n", | |
| " <td>82</td>\n", | |
| " <td>B</td>\n", | |
| " <td>B</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>99</td>\n", | |
| " <td>47</td>\n", | |
| " <td>82</td>\n", | |
| " <td>61</td>\n", | |
| " <td>49</td>\n", | |
| " <td>67</td>\n", | |
| " <td>50</td>\n", | |
| " <td>48</td>\n", | |
| " <td>19</td>\n", | |
| " <td>22</td>\n", | |
| " <td>B</td>\n", | |
| " <td>X</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>5</td>\n", | |
| " <td>39</td>\n", | |
| " <td>92</td>\n", | |
| " <td>49</td>\n", | |
| " <td>89</td>\n", | |
| " <td>9</td>\n", | |
| " <td>35</td>\n", | |
| " <td>51</td>\n", | |
| " <td>17</td>\n", | |
| " <td>45</td>\n", | |
| " <td>A</td>\n", | |
| " <td>C</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>20</td>\n", | |
| " <td>45</td>\n", | |
| " <td>54</td>\n", | |
| " <td>40</td>\n", | |
| " <td>78</td>\n", | |
| " <td>56</td>\n", | |
| " <td>53</td>\n", | |
| " <td>94</td>\n", | |
| " <td>45</td>\n", | |
| " <td>2</td>\n", | |
| " <td>A</td>\n", | |
| " <td>C</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>13</td>\n", | |
| " <td>17</td>\n", | |
| " <td>59</td>\n", | |
| " <td>72</td>\n", | |
| " <td>5</td>\n", | |
| " <td>63</td>\n", | |
| " <td>92</td>\n", | |
| " <td>53</td>\n", | |
| " <td>95</td>\n", | |
| " <td>1</td>\n", | |
| " <td>None</td>\n", | |
| " <td>E</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>88</td>\n", | |
| " <td>21</td>\n", | |
| " <td>32</td>\n", | |
| " <td>86</td>\n", | |
| " <td>3</td>\n", | |
| " <td>56</td>\n", | |
| " <td>84</td>\n", | |
| " <td>30</td>\n", | |
| " <td>19</td>\n", | |
| " <td>42</td>\n", | |
| " <td>B</td>\n", | |
| " <td>X</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>40</td>\n", | |
| " <td>81</td>\n", | |
| " <td>84</td>\n", | |
| " <td>21</td>\n", | |
| " <td>65</td>\n", | |
| " <td>30</td>\n", | |
| " <td>30</td>\n", | |
| " <td>18</td>\n", | |
| " <td>25</td>\n", | |
| " <td>22</td>\n", | |
| " <td>A</td>\n", | |
| " <td>C</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>48</td>\n", | |
| " <td>78</td>\n", | |
| " <td>84</td>\n", | |
| " <td>73</td>\n", | |
| " <td>75</td>\n", | |
| " <td>39</td>\n", | |
| " <td>86</td>\n", | |
| " <td>23</td>\n", | |
| " <td>52</td>\n", | |
| " <td>20</td>\n", | |
| " <td>A</td>\n", | |
| " <td>C</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " <td>True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>30</td>\n", | |
| " <td>24</td>\n", | |
| " <td>7</td>\n", | |
| " <td>81</td>\n", | |
| " <td>20</td>\n", | |
| " <td>35</td>\n", | |
| " <td>88</td>\n", | |
| " <td>86</td>\n", | |
| " <td>8</td>\n", | |
| " <td>46</td>\n", | |
| " <td>A</td>\n", | |
| " <td>C</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>False</td>\n", | |
| " <td>True</td>\n", | |
| " <td>False</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n", | |
| "0 43 75 78 91 37 68 51 62 14 98 A C \n", | |
| "1 77 59 73 32 3 95 38 31 66 82 B B \n", | |
| "2 99 47 82 61 49 67 50 48 19 22 B X \n", | |
| "3 5 39 92 49 89 9 35 51 17 45 A C \n", | |
| "4 20 45 54 40 78 56 53 94 45 2 A C \n", | |
| "5 13 17 59 72 5 63 92 53 95 1 None E \n", | |
| "6 88 21 32 86 3 56 84 30 19 42 B X \n", | |
| "7 40 81 84 21 65 30 30 18 25 22 A C \n", | |
| "8 48 78 84 73 75 39 86 23 52 20 A C \n", | |
| "9 30 24 7 81 20 35 88 86 8 46 A C \n", | |
| "\n", | |
| " col0_GT50 col1_GT48 col2_GT51 col3_GT49 col4_GT49 \n", | |
| "0 False True True True False \n", | |
| "1 True True True False False \n", | |
| "2 True False True True False \n", | |
| "3 False False True False True \n", | |
| "4 False False True False True \n", | |
| "5 False False True True False \n", | |
| "6 True False False True False \n", | |
| "7 False True True False True \n", | |
| "8 False True True True True \n", | |
| "9 False False False True False " | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.9" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment