Last active
February 7, 2020 22:10
-
-
Save rschroll/c51449f5554e317a62fc83dd336673d5 to your computer and use it in GitHub Desktop.
Pandas Pipeline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from inspect import isroutine, signature | |
| from functools import wraps | |
| __all__ = ['pipeline'] | |
| if '_all' not in globals(): # Don't redefine this on reloads; otherwise we load the new all | |
| _all = all # This will get overridden when we define methods from pd.DataFrame | |
| def is_pandas(obj): | |
| return isinstance(obj, pd.core.base.PandasObject) | |
| class PandasPipeline(): | |
| def __init__(self, func, *args, **kw): | |
| self.func = func | |
| self.args = args | |
| self.kw = kw | |
| def __rrshift__(self, df): | |
| if is_pandas(df): | |
| return self.func(df, *self.args, **self.kw) | |
| return NotImplemented | |
| def pipeline(n_df=1): | |
| def decorator(func): | |
| @wraps(func) | |
| def decorated(*args, **kw): | |
| if len(args) >= n_df and _all(is_pandas(arg) for arg in args[:n_df]): | |
| try: | |
| signature(func).bind(*args, **kw) | |
| except TypeError: | |
| # Didn't have the right number of arguments, so try a pipeline | |
| pass | |
| else: | |
| return func(*args, **kw) | |
| return PandasPipeline(func, *args, **kw) | |
| return decorated | |
| if isroutine(n_df): | |
| func = n_df | |
| n_df = 1 # Note that this changes n_df in decorated. | |
| return decorator(func) | |
| return decorator | |
| def _load_methods(): | |
| def method_pipeline(func): | |
| @wraps(func) | |
| def decorated(*args, **kw): | |
| return PandasPipeline(func, *args, **kw) | |
| return decorated | |
| global_dict = globals() | |
| for method in dir(pd.DataFrame): | |
| if 'a' <= method[0] <= 'z': | |
| global_dict[method] = method_pipeline(getattr(pd.DataFrame, method)) | |
| __all__.append(method) | |
| _load_methods() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "skip" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "from numpy import nan\n", | |
| "\n", | |
| "def is_pandas(obj):\n", | |
| " return isinstance(obj, pd.core.base.PandasObject)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "# Pandas Pipeline" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "- Elegant syntax for data analysis" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "- Doing naughty things to Python internals" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "```python\n", | |
| "df.replace(-1, nan).to_dict()\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "```python\n", | |
| "df.replace(-1, nan).my_func().to_dict()\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "```python\n", | |
| "my_func(df.replace(-1, nan)).to_dict()\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "```python\n", | |
| "df.replace(-1, nan).pipe(my_func).to_dict()\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "R has pipelines\n", | |
| "\n", | |
| "```r\n", | |
| "df %>% replace(-1, nan) %>% my_func %>% to_dict()\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "Python has the *right-shift* operator" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "4 >> 1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "And Pandas doesn't use it" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "TypeError", | |
| "evalue": "unsupported operand type(s) for >>: 'DataFrame' and 'int'", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-3-d687ee8f1bd0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'b'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m>>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for >>: 'DataFrame' and 'int'" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.DataFrame([{'a': 1, 'b': -1}])\n", | |
| "df >> 1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "Let's use this: `df >> my_func`" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "class PandasPipeline():\n", | |
| " def __init__(self, func):\n", | |
| " self.func = func\n", | |
| " \n", | |
| " def __rrshift__(self, df):\n", | |
| " if is_pandas(df):\n", | |
| " return self.func(df)\n", | |
| " return NotImplemented" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 0" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "def my_func(df):\n", | |
| " return df + 1\n", | |
| "\n", | |
| "my_func_pipe = PandasPipeline(my_func)\n", | |
| "\n", | |
| "df >> my_func_pipe" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "**Fun Python Fact:** *Anything* can be a decorator, if it's callable" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 0" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "@PandasPipeline\n", | |
| "def my_func(df):\n", | |
| " return df + 1\n", | |
| "\n", | |
| "df >> my_func" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "Some people like functions with more than one argument" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "*Fine...*" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def pipeline(func):\n", | |
| " def decorated(*args, **kw):\n", | |
| " return PandasPipeline(func, *args, **kw)\n", | |
| " return decorated\n", | |
| "\n", | |
| "class PandasPipeline():\n", | |
| " def __init__(self, func, *args, **kw):\n", | |
| " self.func = func\n", | |
| " self.args = args\n", | |
| " self.kw = kw\n", | |
| " \n", | |
| " def __rrshift__(self, df):\n", | |
| " if is_pandas(df):\n", | |
| " return self.func(df, *self.args, **self.kw)\n", | |
| " return NotImplemented" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 0" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "@pipeline\n", | |
| "def add_n(df, n):\n", | |
| " return df + n\n", | |
| "\n", | |
| "df >> add_n(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "But you know someone will do this:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<__main__.PandasPipeline at 0x7ff34a7c13d0>" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "add_n(df, 1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "*Fine...*" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def pipeline(n_df=1):\n", | |
| " def decorator(func):\n", | |
| " def decorated(*args, **kw):\n", | |
| " if (len(args) >= n_df and \n", | |
| " all(is_pandas(arg) for arg in args[:n_df])):\n", | |
| " return func(*args, **kw)\n", | |
| " return PandasPipeline(func, *args, **kw)\n", | |
| " return decorated\n", | |
| " return decorator" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "@pipeline()\n", | |
| "def add_n(df, n):\n", | |
| " return df + n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 0" | |
| ] | |
| }, | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df >> add_n(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 0" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "add_n(df, 1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "But this works..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>-2</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 -2" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "add_n(df, df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "...while this doesn't" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "TypeError", | |
| "evalue": "add_n() missing 1 required positional argument: 'n'", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-15-c1586a3c0af2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m>>\u001b[0m \u001b[0madd_n\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;32m<ipython-input-10-abd9dd81dad0>\u001b[0m in \u001b[0;36mdecorated\u001b[0;34m(*args, **kw)\u001b[0m\n\u001b[1;32m 4\u001b[0m if (len(args) >= n_df and \n\u001b[1;32m 5\u001b[0m all(is_pandas(arg) for arg in args[:n_df])):\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mPandasPipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdecorated\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mTypeError\u001b[0m: add_n() missing 1 required positional argument: 'n'" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df >> add_n(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "*Fine...*" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "We could catch the `TypeError`, but that could be raised by many other things. We just want to test the binding of arguments." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<BoundArguments (df= a b\n", | |
| "0 1 -1, n= a b\n", | |
| "0 1 -1)>" | |
| ] | |
| }, | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from inspect import signature\n", | |
| "\n", | |
| "def add_n(df, n):\n", | |
| " return df + n\n", | |
| "\n", | |
| "signature(add_n).bind(df, df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "TypeError", | |
| "evalue": "missing a required argument: 'n'", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-17-05a6cc18d925>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msignature\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madd_n\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;32m~/miniconda2/envs/data3/lib/python3.7/inspect.py\u001b[0m in \u001b[0;36mbind\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 3013\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mpassed\u001b[0m \u001b[0marguments\u001b[0m \u001b[0mcan\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mbe\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3014\u001b[0m \"\"\"\n\u001b[0;32m-> 3015\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_bind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3016\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3017\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mbind_partial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m~/miniconda2/envs/data3/lib/python3.7/inspect.py\u001b[0m in \u001b[0;36m_bind\u001b[0;34m(self, args, kwargs, partial)\u001b[0m\n\u001b[1;32m 2928\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'missing a required argument: {arg!r}'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2929\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparam\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2930\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2931\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2932\u001b[0m \u001b[0;31m# We have a positional argument to process\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mTypeError\u001b[0m: missing a required argument: 'n'" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "signature(add_n).bind(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def pipeline(n_df=1):\n", | |
| " def decorator(func):\n", | |
| " def decorated(*args, **kw):\n", | |
| " if (len(args) >= n_df and \n", | |
| " all(is_pandas(arg) for arg in args[:n_df])):\n", | |
| " try:\n", | |
| " signature(func).bind(*args, **kw)\n", | |
| " except TypeError:\n", | |
| " pass\n", | |
| " else:\n", | |
| " return func(*args, **kw)\n", | |
| " return PandasPipeline(func, *args, **kw)\n", | |
| " return decorated\n", | |
| " return decorator" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "@pipeline()\n", | |
| "def add_n(df, n):\n", | |
| " return df + n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>-2</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 -2" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df >> add_n(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "But you know someone's going to do this:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "TypeError", | |
| "evalue": "unsupported operand type(s) for >>: 'DataFrame' and 'function'", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-21-ebed7a696ae3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m>>\u001b[0m \u001b[0madd_n\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for >>: 'DataFrame' and 'function'" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "@pipeline\n", | |
| "def add_n(df, n):\n", | |
| " return df + n\n", | |
| "\n", | |
| "df >> add_n(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "I did this while writing *this presentation*!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "*Fine...*" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from inspect import isroutine\n", | |
| "\n", | |
| "def pipeline(n_df=1):\n", | |
| " def decorator(func):\n", | |
| " def decorated(*args, **kw):\n", | |
| " if (len(args) >= n_df and \n", | |
| " all(is_pandas(arg) for arg in args[:n_df])):\n", | |
| " try:\n", | |
| " signature(func).bind(*args, **kw)\n", | |
| " except TypeError:\n", | |
| " pass\n", | |
| " else:\n", | |
| " return func(*args, **kw)\n", | |
| " return PandasPipeline(func, *args, **kw)\n", | |
| " return decorated\n", | |
| " \n", | |
| " if isroutine(n_df):\n", | |
| " func = n_df\n", | |
| " n_df = 1\n", | |
| " return decorator(func)\n", | |
| " \n", | |
| " return decorator" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 0" | |
| ] | |
| }, | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "@pipeline\n", | |
| "def add_n(df, n):\n", | |
| " return df + n\n", | |
| "\n", | |
| "df >> add_n(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "-" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a b\n", | |
| "0 2 0" | |
| ] | |
| }, | |
| "execution_count": 24, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "@pipeline()\n", | |
| "def add_n(df, n):\n", | |
| " return df + n\n", | |
| "\n", | |
| "df >> add_n(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "Finally, we get that beautiful syntax:\n", | |
| "\n", | |
| "```python\n", | |
| "(df.replace(-1, nan) >> my_func()).to_dict()\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "source": [ | |
| "No, that's even worse. I want this:\n", | |
| "\n", | |
| "```python\n", | |
| "df >> replace(-1, nan) >> my_func() >> to_dict()\n", | |
| "```" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "*Fine...*" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['T',\n", | |
| " '_AXIS_ALIASES',\n", | |
| " '_AXIS_IALIASES',\n", | |
| " '_AXIS_LEN',\n", | |
| " '_AXIS_NAMES',\n", | |
| " '_AXIS_NUMBERS',\n", | |
| " '_AXIS_ORDERS',\n", | |
| " '_AXIS_REVERSED',\n", | |
| " '__abs__',\n", | |
| " '__add__',\n", | |
| " '__and__',\n", | |
| " '__array__',\n", | |
| " '__array_priority__',\n", | |
| " '__array_wrap__',\n", | |
| " '__bool__',\n", | |
| " '__class__',\n", | |
| " '__contains__',\n", | |
| " '__copy__',\n", | |
| " '__deepcopy__',\n", | |
| " '__delattr__',\n", | |
| " '__delitem__',\n", | |
| " '__dict__',\n", | |
| " '__dir__',\n", | |
| " '__div__',\n", | |
| " '__doc__',\n", | |
| " '__eq__',\n", | |
| " '__finalize__',\n", | |
| " '__floordiv__',\n", | |
| " '__format__',\n", | |
| " '__ge__',\n", | |
| " '__getattr__',\n", | |
| " '__getattribute__',\n", | |
| " '__getitem__',\n", | |
| " '__getstate__',\n", | |
| " '__gt__',\n", | |
| " '__hash__',\n", | |
| " '__iadd__',\n", | |
| " '__iand__',\n", | |
| " '__ifloordiv__',\n", | |
| " '__imod__',\n", | |
| " '__imul__',\n", | |
| " '__init__',\n", | |
| " '__init_subclass__',\n", | |
| " '__invert__',\n", | |
| " '__ior__',\n", | |
| " '__ipow__',\n", | |
| " '__isub__',\n", | |
| " '__iter__',\n", | |
| " '__itruediv__',\n", | |
| " '__ixor__',\n", | |
| " '__le__',\n", | |
| " '__len__',\n", | |
| " '__lt__',\n", | |
| " '__matmul__',\n", | |
| " '__mod__',\n", | |
| " '__module__',\n", | |
| " '__mul__',\n", | |
| " '__ne__',\n", | |
| " '__neg__',\n", | |
| " '__new__',\n", | |
| " '__nonzero__',\n", | |
| " '__or__',\n", | |
| " '__pos__',\n", | |
| " '__pow__',\n", | |
| " '__radd__',\n", | |
| " '__rand__',\n", | |
| " '__rdiv__',\n", | |
| " '__reduce__',\n", | |
| " '__reduce_ex__',\n", | |
| " '__repr__',\n", | |
| " '__rfloordiv__',\n", | |
| " '__rmatmul__',\n", | |
| " '__rmod__',\n", | |
| " '__rmul__',\n", | |
| " '__ror__',\n", | |
| " '__round__',\n", | |
| " '__rpow__',\n", | |
| " '__rsub__',\n", | |
| " '__rtruediv__',\n", | |
| " '__rxor__',\n", | |
| " '__setattr__',\n", | |
| " '__setitem__',\n", | |
| " '__setstate__',\n", | |
| " '__sizeof__',\n", | |
| " '__str__',\n", | |
| " '__sub__',\n", | |
| " '__subclasshook__',\n", | |
| " '__truediv__',\n", | |
| " '__weakref__',\n", | |
| " '__xor__',\n", | |
| " '_accessors',\n", | |
| " '_add_numeric_operations',\n", | |
| " '_add_series_only_operations',\n", | |
| " '_add_series_or_dataframe_operations',\n", | |
| " '_agg_by_level',\n", | |
| " '_agg_examples_doc',\n", | |
| " '_agg_summary_and_see_also_doc',\n", | |
| " '_aggregate',\n", | |
| " '_aggregate_multiple_funcs',\n", | |
| " '_align_frame',\n", | |
| " '_align_series',\n", | |
| " '_box_col_values',\n", | |
| " '_box_item_values',\n", | |
| " '_builtin_table',\n", | |
| " '_check_inplace_setting',\n", | |
| " '_check_is_chained_assignment_possible',\n", | |
| " '_check_label_or_level_ambiguity',\n", | |
| " '_check_percentile',\n", | |
| " '_check_setitem_copy',\n", | |
| " '_clear_item_cache',\n", | |
| " '_clip_with_one_bound',\n", | |
| " '_clip_with_scalar',\n", | |
| " '_combine_const',\n", | |
| " '_combine_frame',\n", | |
| " '_combine_match_columns',\n", | |
| " '_combine_match_index',\n", | |
| " '_consolidate',\n", | |
| " '_consolidate_inplace',\n", | |
| " '_construct_axes_dict',\n", | |
| " '_construct_axes_dict_from',\n", | |
| " '_construct_axes_from_arguments',\n", | |
| " '_constructor',\n", | |
| " '_constructor_expanddim',\n", | |
| " '_constructor_sliced',\n", | |
| " '_convert',\n", | |
| " '_count_level',\n", | |
| " '_create_indexer',\n", | |
| " '_cython_table',\n", | |
| " '_data',\n", | |
| " '_deprecations',\n", | |
| " '_dir_additions',\n", | |
| " '_dir_deletions',\n", | |
| " '_drop_axis',\n", | |
| " '_drop_labels_or_levels',\n", | |
| " '_ensure_valid_index',\n", | |
| " '_find_valid_index',\n", | |
| " '_from_arrays',\n", | |
| " '_from_axes',\n", | |
| " '_get_agg_axis',\n", | |
| " '_get_axis',\n", | |
| " '_get_axis_name',\n", | |
| " '_get_axis_number',\n", | |
| " '_get_axis_resolvers',\n", | |
| " '_get_block_manager_axis',\n", | |
| " '_get_bool_data',\n", | |
| " '_get_cacher',\n", | |
| " '_get_index_resolvers',\n", | |
| " '_get_item_cache',\n", | |
| " '_get_label_or_level_values',\n", | |
| " '_get_numeric_data',\n", | |
| " '_get_space_character_free_column_resolvers',\n", | |
| " '_get_value',\n", | |
| " '_get_values',\n", | |
| " '_getitem_bool_array',\n", | |
| " '_getitem_frame',\n", | |
| " '_getitem_multilevel',\n", | |
| " '_gotitem',\n", | |
| " '_iget_item_cache',\n", | |
| " '_indexed_same',\n", | |
| " '_info_axis',\n", | |
| " '_info_axis_name',\n", | |
| " '_info_axis_number',\n", | |
| " '_info_repr',\n", | |
| " '_init_mgr',\n", | |
| " '_internal_get_values',\n", | |
| " '_internal_names',\n", | |
| " '_internal_names_set',\n", | |
| " '_is_builtin_func',\n", | |
| " '_is_cached',\n", | |
| " '_is_copy',\n", | |
| " '_is_cython_func',\n", | |
| " '_is_datelike_mixed_type',\n", | |
| " '_is_homogeneous_type',\n", | |
| " '_is_label_or_level_reference',\n", | |
| " '_is_label_reference',\n", | |
| " '_is_level_reference',\n", | |
| " '_is_mixed_type',\n", | |
| " '_is_numeric_mixed_type',\n", | |
| " '_is_view',\n", | |
| " '_ix',\n", | |
| " '_ixs',\n", | |
| " '_join_compat',\n", | |
| " '_maybe_cache_changed',\n", | |
| " '_maybe_update_cacher',\n", | |
| " '_metadata',\n", | |
| " '_needs_reindex_multi',\n", | |
| " '_obj_with_exclusions',\n", | |
| " '_protect_consolidate',\n", | |
| " '_reduce',\n", | |
| " '_reindex_axes',\n", | |
| " '_reindex_columns',\n", | |
| " '_reindex_index',\n", | |
| " '_reindex_multi',\n", | |
| " '_reindex_with_indexers',\n", | |
| " '_repr_data_resource_',\n", | |
| " '_repr_fits_horizontal_',\n", | |
| " '_repr_fits_vertical_',\n", | |
| " '_repr_html_',\n", | |
| " '_repr_latex_',\n", | |
| " '_reset_cache',\n", | |
| " '_reset_cacher',\n", | |
| " '_sanitize_column',\n", | |
| " '_selected_obj',\n", | |
| " '_selection',\n", | |
| " '_selection_list',\n", | |
| " '_selection_name',\n", | |
| " '_series',\n", | |
| " '_set_as_cached',\n", | |
| " '_set_axis',\n", | |
| " '_set_axis_name',\n", | |
| " '_set_is_copy',\n", | |
| " '_set_item',\n", | |
| " '_set_value',\n", | |
| " '_setitem_array',\n", | |
| " '_setitem_frame',\n", | |
| " '_setitem_slice',\n", | |
| " '_setup_axes',\n", | |
| " '_shallow_copy',\n", | |
| " '_slice',\n", | |
| " '_stat_axis',\n", | |
| " '_stat_axis_name',\n", | |
| " '_stat_axis_number',\n", | |
| " '_to_dict_of_blocks',\n", | |
| " '_try_aggregate_string_function',\n", | |
| " '_typ',\n", | |
| " '_unpickle_frame_compat',\n", | |
| " '_unpickle_matrix_compat',\n", | |
| " '_update_inplace',\n", | |
| " '_validate_dtype',\n", | |
| " '_values',\n", | |
| " '_where',\n", | |
| " '_xs',\n", | |
| " 'abs',\n", | |
| " 'add',\n", | |
| " 'add_prefix',\n", | |
| " 'add_suffix',\n", | |
| " 'agg',\n", | |
| " 'aggregate',\n", | |
| " 'align',\n", | |
| " 'all',\n", | |
| " 'any',\n", | |
| " 'append',\n", | |
| " 'apply',\n", | |
| " 'applymap',\n", | |
| " 'as_blocks',\n", | |
| " 'as_matrix',\n", | |
| " 'asfreq',\n", | |
| " 'asof',\n", | |
| " 'assign',\n", | |
| " 'astype',\n", | |
| " 'at',\n", | |
| " 'at_time',\n", | |
| " 'axes',\n", | |
| " 'between_time',\n", | |
| " 'bfill',\n", | |
| " 'blocks',\n", | |
| " 'bool',\n", | |
| " 'boxplot',\n", | |
| " 'clip',\n", | |
| " 'clip_lower',\n", | |
| " 'clip_upper',\n", | |
| " 'columns',\n", | |
| " 'combine',\n", | |
| " 'combine_first',\n", | |
| " 'compound',\n", | |
| " 'copy',\n", | |
| " 'corr',\n", | |
| " 'corrwith',\n", | |
| " 'count',\n", | |
| " 'cov',\n", | |
| " 'cummax',\n", | |
| " 'cummin',\n", | |
| " 'cumprod',\n", | |
| " 'cumsum',\n", | |
| " 'describe',\n", | |
| " 'diff',\n", | |
| " 'div',\n", | |
| " 'divide',\n", | |
| " 'dot',\n", | |
| " 'drop',\n", | |
| " 'drop_duplicates',\n", | |
| " 'droplevel',\n", | |
| " 'dropna',\n", | |
| " 'dtypes',\n", | |
| " 'duplicated',\n", | |
| " 'empty',\n", | |
| " 'eq',\n", | |
| " 'equals',\n", | |
| " 'eval',\n", | |
| " 'ewm',\n", | |
| " 'expanding',\n", | |
| " 'explode',\n", | |
| " 'ffill',\n", | |
| " 'fillna',\n", | |
| " 'filter',\n", | |
| " 'first',\n", | |
| " 'first_valid_index',\n", | |
| " 'floordiv',\n", | |
| " 'from_dict',\n", | |
| " 'from_items',\n", | |
| " 'from_records',\n", | |
| " 'ftypes',\n", | |
| " 'ge',\n", | |
| " 'get',\n", | |
| " 'get_dtype_counts',\n", | |
| " 'get_ftype_counts',\n", | |
| " 'get_value',\n", | |
| " 'get_values',\n", | |
| " 'groupby',\n", | |
| " 'gt',\n", | |
| " 'head',\n", | |
| " 'hist',\n", | |
| " 'iat',\n", | |
| " 'idxmax',\n", | |
| " 'idxmin',\n", | |
| " 'iloc',\n", | |
| " 'index',\n", | |
| " 'infer_objects',\n", | |
| " 'info',\n", | |
| " 'insert',\n", | |
| " 'interpolate',\n", | |
| " 'is_copy',\n", | |
| " 'isin',\n", | |
| " 'isna',\n", | |
| " 'isnull',\n", | |
| " 'items',\n", | |
| " 'iteritems',\n", | |
| " 'iterrows',\n", | |
| " 'itertuples',\n", | |
| " 'ix',\n", | |
| " 'join',\n", | |
| " 'keys',\n", | |
| " 'kurt',\n", | |
| " 'kurtosis',\n", | |
| " 'last',\n", | |
| " 'last_valid_index',\n", | |
| " 'le',\n", | |
| " 'loc',\n", | |
| " 'lookup',\n", | |
| " 'lt',\n", | |
| " 'mad',\n", | |
| " 'mask',\n", | |
| " 'max',\n", | |
| " 'mean',\n", | |
| " 'median',\n", | |
| " 'melt',\n", | |
| " 'memory_usage',\n", | |
| " 'merge',\n", | |
| " 'min',\n", | |
| " 'mod',\n", | |
| " 'mode',\n", | |
| " 'mul',\n", | |
| " 'multiply',\n", | |
| " 'ndim',\n", | |
| " 'ne',\n", | |
| " 'nlargest',\n", | |
| " 'notna',\n", | |
| " 'notnull',\n", | |
| " 'nsmallest',\n", | |
| " 'nunique',\n", | |
| " 'pct_change',\n", | |
| " 'pipe',\n", | |
| " 'pivot',\n", | |
| " 'pivot_table',\n", | |
| " 'plot',\n", | |
| " 'pop',\n", | |
| " 'pow',\n", | |
| " 'prod',\n", | |
| " 'product',\n", | |
| " 'quantile',\n", | |
| " 'query',\n", | |
| " 'radd',\n", | |
| " 'rank',\n", | |
| " 'rdiv',\n", | |
| " 'reindex',\n", | |
| " 'reindex_like',\n", | |
| " 'rename',\n", | |
| " 'rename_axis',\n", | |
| " 'reorder_levels',\n", | |
| " 'replace',\n", | |
| " 'resample',\n", | |
| " 'reset_index',\n", | |
| " 'rfloordiv',\n", | |
| " 'rmod',\n", | |
| " 'rmul',\n", | |
| " 'rolling',\n", | |
| " 'round',\n", | |
| " 'rpow',\n", | |
| " 'rsub',\n", | |
| " 'rtruediv',\n", | |
| " 'sample',\n", | |
| " 'select_dtypes',\n", | |
| " 'sem',\n", | |
| " 'set_axis',\n", | |
| " 'set_index',\n", | |
| " 'set_value',\n", | |
| " 'shape',\n", | |
| " 'shift',\n", | |
| " 'size',\n", | |
| " 'skew',\n", | |
| " 'slice_shift',\n", | |
| " 'sort_index',\n", | |
| " 'sort_values',\n", | |
| " 'sparse',\n", | |
| " 'squeeze',\n", | |
| " 'stack',\n", | |
| " 'std',\n", | |
| " 'style',\n", | |
| " 'sub',\n", | |
| " 'subtract',\n", | |
| " 'sum',\n", | |
| " 'swapaxes',\n", | |
| " 'swaplevel',\n", | |
| " 'tail',\n", | |
| " 'take',\n", | |
| " 'to_clipboard',\n", | |
| " 'to_csv',\n", | |
| " 'to_dense',\n", | |
| " 'to_dict',\n", | |
| " 'to_excel',\n", | |
| " 'to_feather',\n", | |
| " 'to_gbq',\n", | |
| " 'to_hdf',\n", | |
| " 'to_html',\n", | |
| " 'to_json',\n", | |
| " 'to_latex',\n", | |
| " 'to_msgpack',\n", | |
| " 'to_numpy',\n", | |
| " 'to_parquet',\n", | |
| " 'to_period',\n", | |
| " 'to_pickle',\n", | |
| " 'to_records',\n", | |
| " 'to_sparse',\n", | |
| " 'to_sql',\n", | |
| " 'to_stata',\n", | |
| " 'to_string',\n", | |
| " 'to_timestamp',\n", | |
| " 'to_xarray',\n", | |
| " 'transform',\n", | |
| " 'transpose',\n", | |
| " 'truediv',\n", | |
| " 'truncate',\n", | |
| " 'tshift',\n", | |
| " 'tz_convert',\n", | |
| " 'tz_localize',\n", | |
| " 'unstack',\n", | |
| " 'update',\n", | |
| " 'values',\n", | |
| " 'var',\n", | |
| " 'where',\n", | |
| " 'xs']" | |
| ] | |
| }, | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dir(pd.DataFrame)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def method_pipeline(func):\n", | |
| " def decorated(*args, **kw):\n", | |
| " return PandasPipeline(func, *args, **kw)\n", | |
| " return decorated\n", | |
| "\n", | |
| "global_dict = globals()\n", | |
| "\n", | |
| "for method in dir(pd.DataFrame):\n", | |
| " if 'a' <= method[0] <= 'z':\n", | |
| " global_dict[method] = method_pipeline(getattr(pd.DataFrame,\n", | |
| " method))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'a': {0: 2}, 'b': {0: nan}}" | |
| ] | |
| }, | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df >> replace(-1, nan) >> add_n(1) >> to_dict()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "slide" | |
| } | |
| }, | |
| "source": [ | |
| "## Summary" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "elegant_syntax = True" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": { | |
| "collapsed": true, | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "python_internals = True" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": { | |
| "slideshow": { | |
| "slide_type": "fragment" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<__main__.PandasPipeline at 0x7ff34a730e50>" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "all([elegant_syntax, python_internals])" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "celltoolbar": "Slideshow", | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.5" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment