Created
June 30, 2017 03:44
-
-
Save DadgadCafe/27165ed2b3e18787dac8dd0c4daf960b to your computer and use it in GitHub Desktop.
notes of numpy and pandas.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| # linspace:创建线段 | |
| arr = np.array([[1, 2, 3] | |
| [4, 5, 6]], dtype=int32) # list to matrix | |
| arr.ndim # 2 dimensions | |
| arr.shape # 2 * 3 | |
| arr.size # number of elements | |
| np.zeros((3, 4), dtype=int) # 3*4 | |
| np.ones((3, 4), dtype=int16) # 3*4 | |
| np.empty((3, 4), dtype=int) # 3*4, close to 0 | |
| a = np.arange(10, 20, 2) | |
| a[0] #10 | |
| a = np.arange(10, 50, 2) # 10 - 20, step 2 | |
| .reshape((4, 5)) # reshape to 3*4 | |
| a[0][0] #10 | |
| a[0, 0] #same | |
| a[0, 1:3] # [12, 14] | |
| np.linspace(1, 10, 20) # 1 - 10, 20 pieces | |
| .reshape((5, 4)) | |
| # matrix operation | |
| a1 = np.arange(5) # array([0, 1, 2, 3, 4]) | |
| a2 = np.arange(10, 15) # array([10, 11, 12, 13, 14]) | |
| a2 - a1 # array([10, 10, 10, 10, 10]) | |
| a2 + a1 # array([10, 12, 14, 16, 18]) | |
| a2 * a1 # array([ 0, 11, 24, 39, 56]) | |
| a1 / a2 # array([ 0. , 0.09090909, 0.16666667, 0.23076923, 0.28571429]) | |
| a1 ** 2 # array([ 0, 1, 4, 9, 16]) | |
| a1 < 0 # array([False, False, False, False, False], dtype=bool) | |
| np.sin(a1) | |
| a = np.array([[1,1],[0,1]]) | |
| b = np.arange(4).reshape((2,2)) # [[0, 1], [2, 3]] | |
| np.dot(a, b) # array([[2, 4], [2, 3]]) | |
| a.dot(b) # same | |
| np.dot([1,2,3],[4,5,6]) # 1*4 + 2*5 + 3*6 = 32 | |
| a = np.random.random((2,4)) | |
| np.sum(a) | |
| np.max(a) | |
| np.min(a) | |
| np.sum(a, axis=0) # sum by column | |
| np.sum(a, axis=1) # sum by row | |
| A = np.arange(2,14).reshape((3,4)) | |
| # array([[ 2, 3, 4, 5] | |
| # [ 6, 7, 8, 9] | |
| # [10,11,12,13]]) | |
| np.argmax(A) # index of 2: 0 | |
| np.argmin(A) # indeox of 13: 11 | |
| np.mean(A) # A.mean() # 7.5 | |
| np.median(A) # A.median() | |
| np.median(A, axis=0) | |
| np.average(A) # 7.5 | |
| np.cumsum(A) # [2, 2+3, 2+3+4, ...] => array([ 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90]) | |
| np.diff(A) # [[3-2, 4-3, 5-4], ...] => array([[1, 1, 1], [1, 1, 1], [1, 1, 1]]) | |
| np.nonzero(A) # position of nonzero: (0, 0), (0, 1) ... | |
| #(array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]), | |
| # array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])) | |
| A = np.arange(14, 2, -1) | |
| .reshape((3,4)) | |
| # array([[14, 13, 12, 11], | |
| # [10, 9, 8, 7], | |
| # [ 6, 5, 4, 3]]) | |
| np.sort(A) | |
| # array([[11,12,13,14] | |
| # [ 7, 8, 9,10] | |
| # [ 3, 4, 5, 6]]) | |
| np.transpose(A) | |
| A.T # same | |
| # array([[14,10, 6] | |
| # [13, 9, 5] | |
| # [12, 8, 4] | |
| # [11, 7, 3]]) | |
| (A.T).dot(A) | |
| # array([[332, 302, 272, 242], | |
| # [302, 275, 248, 221], | |
| # [272, 248, 224, 200], | |
| # [242, 221, 200, 179]]) | |
| np.clip(A, 5, 9) # >9 => 9; <5 => 5 | |
| # array([[ 9, 9, 9, 9] | |
| # [ 9, 9, 8, 7] | |
| # [ 6, 5, 5, 5]]) | |
| a = np.arange(4).reshape((2, 2)) | |
| a[0, 0] # 0 | |
| for row in a: | |
| print(row) | |
| for column in a.T: | |
| print(column) | |
| a.flatten() # array([0, 1, 2, 3]) | |
| for item in a.flat: # iterate items | |
| print(item) | |
| a = np.array([1, 1, 1]) | |
| b = np.array([2, 2, 2]) | |
| np.vstack((a, b)) # array([[1, 1, 1], [2, 2, 2]]) | |
| np.hstack((a, b)) # array([1, 1, 1, 2, 2, 2]) | |
| a[:, np.newaxis] # array([[1], [1], [1]]) | |
| # using concatenate | |
| np.concatenate((a, b, b), axis=0) # 0:vertical 1:horizontal | |
| a = np.arange(12).reshape((3, 4)) | |
| np.hsplit(a, 2) | |
| np.split(a, 2, axis=1) # horizontal | |
| # [ array([[0, 1], [4, 5], [8, 9]]), | |
| # array([[2, 3], [6, 7], [10, 11]])] | |
| np.vsplit(a, 3) | |
| np.split(a, 3, axis=0) # vertically | |
| # [array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[8, 9, 10, 11]])] | |
| np.array_split(A, 3, axis=1) # uneven split | |
| # [ array([[0, 1],[4, 5],[8, 9]]), | |
| # array([[2], [6], [10]]), | |
| # array([[3], [7], [11]])] | |
| a = np.arange(4) | |
| b = a | |
| a is b # True | |
| c = a.copy() | |
| c is a # False | |
| #pandas | |
| import pandas as pd | |
| s = pd.Series([1, 3, np.nan, 5]) | |
| # 0 1.0 | |
| # 1 3.0 | |
| # 2 NaN | |
| # 3 5.0 | |
| # dtype: float64 | |
| dates = pd.date_range('20170101', periods=6) | |
| # DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06'], | |
| # dtype='datetime64[ns]', freq='D') | |
| df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['a', 'b', 'c', 'd']) | |
| # a b c d | |
| # 2017-01-01 -0.669733 0.091818 0.581845 -0.290370 | |
| # 2017-01-02 0.203958 -0.840011 -1.234419 1.567374 | |
| # 2017-01-03 0.761231 -0.712473 0.954426 2.002349 | |
| # 2017-01-04 0.477278 0.860596 0.867349 0.438903 | |
| # 2017-01-05 -1.431947 0.684325 -0.762821 0.815071 | |
| # 2017-01-06 -0.095380 -0.515609 0.184032 -0.482174 | |
| pd.DataFrame(np.arange(12).reshape((3, 4))) | |
| # 0 1 2 3 | |
| # 0 0 1 2 3 | |
| # 1 4 5 6 7 | |
| # 2 8 9 10 11 | |
| df2 = pd.DataFrame({'A': 1., | |
| 'B': pd.Timestamp('20130102'), | |
| 'C': pd.Series(1, index=list(range(4)), dtype='float32'), | |
| 'D': np.arange(4), | |
| 'E': pd.Categorical(['test', 'train', 'test', 'train']), | |
| 'F': 'foo'}) | |
| # A B C D E F | |
| # 0 1.0 2013-01-02 1.0 0 test foo | |
| # 1 1.0 2013-01-02 1.0 1 train foo | |
| # 2 1.0 2013-01-02 1.0 2 test foo | |
| # 3 1.0 2013-01-02 1.0 3 train foo | |
| df2.type | |
| df2.index | |
| df2.columns | |
| df2.rows | |
| df2.values | |
| df2.describe() | |
| df2.T | |
| df2.sort_index(axis=1, ascending=False) # sort by column name | |
| df2.sort_values(by='B') # sort by B column value | |
| # data selection | |
| dates = pd.date_range('20130101', periods=6) | |
| df = pd.DataFrame(np.arange(24).reshape((6,4)), | |
| index=dates, | |
| columns=['A','B','C','D']) | |
| ''' | |
| A B C D | |
| 2013-01-01 0 1 2 3 | |
| 2013-01-02 4 5 6 7 | |
| 2013-01-03 8 9 10 11 | |
| 2013-01-04 12 13 14 15 | |
| 2013-01-05 16 17 18 19 | |
| 2013-01-06 20 21 22 23 | |
| ''' | |
| df['A'] | |
| df.A | |
| ''' | |
| 2013-01-01 0 | |
| 2013-01-02 4 | |
| 2013-01-03 8 | |
| 2013-01-04 12 | |
| 2013-01-05 16 | |
| 2013-01-06 20 | |
| Freq: D, Name: A, dtype: int64 | |
| ''' | |
| df[0:3] | |
| ''' | |
| A B C D | |
| 2013-01-01 0 1 2 3 | |
| 2013-01-02 4 5 6 7 | |
| 2013-01-03 8 9 10 11 | |
| ''' | |
| df['20130102':'20130104'] | |
| ''' | |
| A B C D | |
| 2013-01-02 4 5 6 7 | |
| 2013-01-03 8 9 10 11 | |
| 2013-01-04 12 13 14 15 | |
| ''' | |
| df.loc['20130102'] | |
| ''' | |
| A 4 | |
| B 5 | |
| C 6 | |
| D 7 | |
| Name: 2013-01-02 00:00:00, dtype: int64 | |
| ''' | |
| df.loc[:,['A','B']] | |
| ''' | |
| A B | |
| 2013-01-01 0 1 | |
| 2013-01-02 4 5 | |
| 2013-01-03 8 9 | |
| 2013-01-04 12 13 | |
| 2013-01-05 16 17 | |
| 2013-01-06 20 21 | |
| ''' | |
| df.loc['20130102',['A','B']] | |
| ''' | |
| A 4 | |
| B 5 | |
| Name: 2013-01-02 00:00:00, dtype: int64 | |
| ''' | |
| df.iloc[3,1] # 13 | |
| df.iloc[3:5, 1:3] | |
| ''' | |
| B C | |
| 2013-01-04 13 14 | |
| 2013-01-05 17 18 | |
| ''' | |
| df.iloc[[1,3,5],1:3] | |
| ''' | |
| B C | |
| 2013-01-02 5 6 | |
| 2013-01-04 13 14 | |
| 2013-01-06 21 22 | |
| ''' | |
| df.ix[:3,['A','C']] | |
| ''' | |
| A C | |
| 2013-01-01 0 2 | |
| 2013-01-02 4 6 | |
| 2013-01-03 8 10 | |
| ''' | |
| # Boolean indexing: | |
| df[df.A>8] | |
| ''' | |
| A B C D | |
| 2013-01-04 12 13 14 15 | |
| 2013-01-05 16 17 18 19 | |
| 2013-01-06 20 21 22 23 | |
| ''' | |
| dates = pd.date_range('20130101', periods=6) | |
| df = pd.DataFrame(np.arange(24).reshape((6, 4)), | |
| index=dates, | |
| columns=['A', 'B', 'C', 'D']) | |
| df.iloc[2, 2] = 111 | |
| df.loc['20130101', 'B'] = 222 | |
| ''' | |
| A B C D | |
| 2013-01-01 0 222 2 3 | |
| 2013-01-02 4 5 6 7 | |
| 2013-01-03 8 9 111 11 | |
| 2013-01-04 12 13 14 15 | |
| 2013-01-05 16 17 18 19 | |
| 2013-01-06 20 21 22 23 | |
| ''' | |
| df.B[df.A>4] = 0 | |
| ''' | |
| A B C D | |
| 2013-01-01 0 2222 2 3 | |
| 2013-01-02 4 5 6 7 | |
| 2013-01-03 8 0 1111 11 | |
| 2013-01-04 12 0 14 15 | |
| 2013-01-05 16 0 18 19 | |
| 2013-01-06 20 0 22 23 | |
| ''' | |
| df['F'] = np.nan | |
| ''' | |
| A B C D F | |
| 2013-01-01 0 222 2 3 NaN | |
| 2013-01-02 4 5 6 7 NaN | |
| 2013-01-03 8 0 111 11 NaN | |
| 2013-01-04 12 0 14 15 NaN | |
| 2013-01-05 16 0 18 19 NaN | |
| 2013-01-06 20 0 22 23 NaN | |
| ''' | |
| df['E'] = pd.Series([1,2,3,4,5,6], | |
| index=pd.date_range('20130101', | |
| periods=6)) | |
| ''' | |
| A B C D F E | |
| 2013-01-01 0 2222 2 3 NaN 1 | |
| 2013-01-02 4 5 6 7 NaN 2 | |
| 2013-01-03 8 0 1111 11 NaN 3 | |
| 2013-01-04 12 0 14 15 NaN 4 | |
| 2013-01-05 16 0 18 19 NaN 5 | |
| 2013-01-06 20 0 22 23 NaN 6 | |
| ''' | |
| dates = pd.date_range('20130101', periods=6) | |
| df = pd.DataFrame(np.arange(24).reshape((6, 4)), | |
| index=dates, | |
| columns=['A', 'B', 'C', 'D']) | |
| df.iloc[0, 1] = np.nan | |
| df.iloc[1, 2] = np.nan | |
| ''' | |
| A B C D | |
| 2013-01-01 0 NaN 2.0 3 | |
| 2013-01-02 4 5.0 NaN 7 | |
| 2013-01-03 8 9.0 10.0 11 | |
| 2013-01-04 12 13.0 14.0 15 | |
| 2013-01-05 16 17.0 18.0 19 | |
| 2013-01-06 20 21.0 22.0 23 | |
| ''' | |
| df.dropna( | |
| axis=0, # 0: 对行进行操作; 1: 对列进行操作 | |
| how='any' # 'any': 只要存在 NaN 就 drop 掉; 'all': 必须全部是 NaN 才 drop | |
| ) | |
| ''' | |
| A B C D | |
| 2013-01-03 8 9.0 10.0 11 | |
| 2013-01-04 12 13.0 14.0 15 | |
| 2013-01-05 16 17.0 18.0 19 | |
| 2013-01-06 20 21.0 22.0 23 | |
| ''' | |
| df.fillna(value=0) | |
| ''' | |
| A B C D | |
| 2013-01-01 0 0.0 2.0 3 | |
| 2013-01-02 4 5.0 0.0 7 | |
| 2013-01-03 8 9.0 10.0 11 | |
| 2013-01-04 12 13.0 14.0 15 | |
| 2013-01-05 16 17.0 18.0 19 | |
| 2013-01-06 20 21.0 22.0 23 | |
| ''' | |
| df.isnull() | |
| ''' | |
| A B C D | |
| 2013-01-01 False True False False | |
| 2013-01-02 False False True False | |
| 2013-01-03 False False False False | |
| 2013-01-04 False False False False | |
| 2013-01-05 False False False False | |
| 2013-01-06 False False False False | |
| ''' | |
| np.any(df.isnull()) # if exists nan | |
| # read | |
| data = pd.read_csv('students.csv') | |
| # to pickle | |
| data.to_pickle('student.pickle') | |
| # concat | |
| # axis, default 0 | |
| df1 = pd.DataFrame(np.ones((3, 4))*0, columns=['a', 'b', 'c', 'd']) | |
| df2 = pd.DataFrame(np.ones((3, 4))*1, columns=['a', 'b', 'c', 'd']) | |
| df3 = pd.DataFrame(np.ones((3, 4))*2, columns=['a', 'b', 'c', 'd']) | |
| pd.concat([df1, df2, df3], axis=0) | |
| ''' | |
| a b c d | |
| 0 0.0 0.0 0.0 0.0 | |
| 1 0.0 0.0 0.0 0.0 | |
| 2 0.0 0.0 0.0 0.0 | |
| 0 1.0 1.0 1.0 1.0 | |
| 1 1.0 1.0 1.0 1.0 | |
| 2 1.0 1.0 1.0 1.0 | |
| 0 2.0 2.0 2.0 2.0 | |
| 1 2.0 2.0 2.0 2.0 | |
| 2 2.0 2.0 2.0 2.0 | |
| ''' | |
| pd.concat([df1, df2, df3], axis=0, ignore_index=True) | |
| ''' | |
| a b c d | |
| 0 0.0 0.0 0.0 0.0 | |
| 1 0.0 0.0 0.0 0.0 | |
| 2 0.0 0.0 0.0 0.0 | |
| 3 1.0 1.0 1.0 1.0 | |
| 4 1.0 1.0 1.0 1.0 | |
| 5 1.0 1.0 1.0 1.0 | |
| 6 2.0 2.0 2.0 2.0 | |
| 7 2.0 2.0 2.0 2.0 | |
| 8 2.0 2.0 2.0 2.0 | |
| ''' | |
| df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3]) | |
| df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d','e'], index=[2,3,4]) | |
| pd.concat([df1, df2], axis=0, join='outer') | |
| ''' | |
| a b c d e | |
| 1 0.0 0.0 0.0 0.0 NaN | |
| 2 0.0 0.0 0.0 0.0 NaN | |
| 3 0.0 0.0 0.0 0.0 NaN | |
| 2 NaN 1.0 1.0 1.0 1.0 | |
| 3 NaN 1.0 1.0 1.0 1.0 | |
| 4 NaN 1.0 1.0 1.0 1.0 | |
| ''' | |
| pd.concat([df1, df2], axis=0, join='inner') | |
| ''' | |
| b c d | |
| 1 0.0 0.0 0.0 | |
| 2 0.0 0.0 0.0 | |
| 3 0.0 0.0 0.0 | |
| 2 1.0 1.0 1.0 | |
| 3 1.0 1.0 1.0 | |
| 4 1.0 1.0 1.0 | |
| ''' | |
| pd.concat([df1, df2], axis=0, join='inner', ignore_index=True) | |
| ''' | |
| b c d | |
| 0 0.0 0.0 0.0 | |
| 1 0.0 0.0 0.0 | |
| 2 0.0 0.0 0.0 | |
| 3 1.0 1.0 1.0 | |
| 4 1.0 1.0 1.0 | |
| 5 1.0 1.0 1.0 | |
| ''' | |
| # horizontal by index | |
| df1 = pd.DataFrame(np.ones((3, 4)), | |
| columns=['A', 'B', 'C', 'D'], | |
| index=[1, 2, 3]) | |
| df2 = pd.DataFrame(np.ones((3, 4)), | |
| columns=['A', 'B', 'C', 'D'] | |
| index=[2, 3, 4]) | |
| pd.concat([df1, df2], axis=1, join_axes=[df1.index]) | |
| # a b c d b c d e | |
| # 1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN | |
| # 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 | |
| # 3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 | |
| df1 = pd.DataFrame(np.ones((3, 4)) * 0, | |
| columns=['A', 'B', 'C', 'D']) | |
| df2 = pd.DataFrame(np.ones((3, 4)) * 1, | |
| columns=['A', 'B', 'C', 'D']) | |
| df3 = pd.DataFrame(np.ones((3, 4)) * 1, | |
| columns=['A', 'B', 'C', 'D']) | |
| df1.append(df2, ignore_index=True) | |
| ''' | |
| a b c d | |
| 0 0.0 0.0 0.0 0.0 | |
| 1 0.0 0.0 0.0 0.0 | |
| 2 0.0 0.0 0.0 0.0 | |
| 3 1.0 1.0 1.0 1.0 | |
| 4 1.0 1.0 1.0 1.0 | |
| 5 1.0 1.0 1.0 1.0 | |
| ''' | |
| s1 = pd.Series([1,2,3,4], | |
| index=['a','b','c','d']) | |
| df1.append(s1, ignore_index=True) | |
| ''' | |
| # a b c d | |
| # 0 0.0 0.0 0.0 0.0 | |
| # 1 0.0 0.0 0.0 0.0 | |
| # 2 0.0 0.0 0.0 0.0 | |
| # 3 1.0 2.0 3.0 4.0 | |
| ''' | |
| left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], | |
| 'A': ['A0', 'A1', 'A2', 'A3'], | |
| 'B': ['B0', 'B1', 'B2', 'B3']}) | |
| right = pd.DataFrame({'key': ['K1', 'K2', 'K3', 'K4'], | |
| 'C': ['C0', 'C1', 'C2', 'C3'], | |
| 'D': ['D0', 'D1', 'D2', 'D3']}) | |
| ''' | |
| A B key | |
| 0 A0 B0 K0 | |
| 1 A1 B1 K1 | |
| 2 A2 B2 K2 | |
| 3 A3 B3 K3 | |
| ''' | |
| ''' | |
| C D key | |
| 0 C0 D0 K1 | |
| 1 C1 D1 K2 | |
| 2 C2 D2 K3 | |
| 3 C3 D3 K4 | |
| ''' | |
| pd.merge(left, right, on='key') | |
| ''' | |
| A B key C D | |
| 0 A1 B1 K1 C0 D0 | |
| 1 A2 B2 K2 C1 D1 | |
| 2 A3 B3 K3 C2 D2 | |
| ''' | |
| left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], | |
| 'key2': ['K0', 'K1', 'K0', 'K1'], | |
| 'A': ['A0', 'A1', 'A2', 'A3'], | |
| 'B': ['B0', 'B1', 'B2', 'B3']}) | |
| right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'], | |
| 'key2': ['K0', 'K0', 'K0', 'K0'], | |
| 'C': ['C0', 'C1', 'C2', 'C3'], | |
| 'D': ['D0', 'D1', 'D2', 'D3']}) | |
| pd.merge(left, right, on=['key1', 'key2'], how='inner') | |
| pd.merge(left, right, on=['key1', 'key2'], how='outer') | |
| pd.merge(left, right, on=['key1', 'key2'], how='left') | |
| pd.merge(left, right, on=['key1', 'key2'], how='right') | |
| df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']}) | |
| df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]}) | |
| pd.merge(df1, df2, on='col1', how='outer', indicator=True) | |
| ''' | |
| # col1 col_left col_right _merge | |
| # 0 0.0 a NaN left_only | |
| # 1 1.0 b 2.0 both | |
| # 2 2.0 NaN 2.0 right_only | |
| # 3 2.0 NaN 2.0 right_only | |
| ''' | |
| pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column') | |
| ''' | |
| col1 col_left col_right indicator_column | |
| 0 0.0 a NaN left_only | |
| 1 1.0 b 2.0 both | |
| 2 2.0 NaN 2.0 right_only | |
| 3 2.0 NaN 2.0 right_only | |
| ''' | |
| # merge by index | |
| left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], | |
| 'B': ['B0', 'B1', 'B2']}, | |
| index=['K0', 'K1', 'K2']) | |
| right = pd.DataFrame({'C': ['C0', 'C2', 'C3'], | |
| 'D': ['D0', 'D2', 'D3']}, | |
| index=['K0', 'K2', 'K3']) | |
| pd.merge(left, right, left_index=True, right_index=True, how='outer') | |
| # A B C D | |
| # K0 A0 B0 C0 D0 | |
| # K1 A1 B1 NaN NaN | |
| # K2 A2 B2 C2 D2 | |
| # K3 NaN NaN C3 D3 | |
| pd.merge(left, right, left_index=True, right_index=True, how='inner') | |
| # A B C D | |
| # K0 A0 B0 C0 D0 | |
| # K2 A2 B2 C2 D2 | |
| boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]}) | |
| girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]}) | |
| #使用suffixes解决overlapping的问题 | |
| pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner') | |
| ''' | |
| age_boy k age_girl | |
| 0 1 K0 4 | |
| 1 1 K0 5 | |
| ''' | |
| # draw | |
| import matplotlib.pyplot as plt | |
| data = pd.Series(np.random.randn(1000),index=np.arange(1000)) | |
| data.cumsum() | |
| data.plot() | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment