Skip to content

Instantly share code, notes, and snippets.

@rschroll
Last active February 7, 2020 22:10
Show Gist options
  • Select an option

  • Save rschroll/c51449f5554e317a62fc83dd336673d5 to your computer and use it in GitHub Desktop.

Select an option

Save rschroll/c51449f5554e317a62fc83dd336673d5 to your computer and use it in GitHub Desktop.
Pandas Pipeline
import pandas as pd
from inspect import isroutine, signature
from functools import wraps
__all__ = ['pipeline']
if '_all' not in globals(): # Don't redefine this on reloads; otherwise we load the new all
_all = all # This will get overridden when we define methods from pd.DataFrame
def is_pandas(obj):
return isinstance(obj, pd.core.base.PandasObject)
class PandasPipeline():
def __init__(self, func, *args, **kw):
self.func = func
self.args = args
self.kw = kw
def __rrshift__(self, df):
if is_pandas(df):
return self.func(df, *self.args, **self.kw)
return NotImplemented
def pipeline(n_df=1):
def decorator(func):
@wraps(func)
def decorated(*args, **kw):
if len(args) >= n_df and _all(is_pandas(arg) for arg in args[:n_df]):
try:
signature(func).bind(*args, **kw)
except TypeError:
# Didn't have the right number of arguments, so try a pipeline
pass
else:
return func(*args, **kw)
return PandasPipeline(func, *args, **kw)
return decorated
if isroutine(n_df):
func = n_df
n_df = 1 # Note that this changes n_df in decorated.
return decorator(func)
return decorator
def _load_methods():
def method_pipeline(func):
@wraps(func)
def decorated(*args, **kw):
return PandasPipeline(func, *args, **kw)
return decorated
global_dict = globals()
for method in dir(pd.DataFrame):
if 'a' <= method[0] <= 'z':
global_dict[method] = method_pipeline(getattr(pd.DataFrame, method))
__all__.append(method)
_load_methods()
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from numpy import nan\n",
"\n",
"def is_pandas(obj):\n",
" return isinstance(obj, pd.core.base.PandasObject)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# Pandas Pipeline"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"- Elegant syntax for data analysis"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"- Doing naughty things to Python internals"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"```python\n",
"df.replace(-1, nan).to_dict()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"```python\n",
"df.replace(-1, nan).my_func().to_dict()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"```python\n",
"my_func(df.replace(-1, nan)).to_dict()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"```python\n",
"df.replace(-1, nan).pipe(my_func).to_dict()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"R has pipelines\n",
"\n",
"```r\n",
"df %>% replace(-1, nan) %>% my_func %>% to_dict()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Python has the *right-shift* operator"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"4 >> 1"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"And Pandas doesn't use it"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "unsupported operand type(s) for >>: 'DataFrame' and 'int'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-d687ee8f1bd0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'b'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m>>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for >>: 'DataFrame' and 'int'"
]
}
],
"source": [
"df = pd.DataFrame([{'a': 1, 'b': -1}])\n",
"df >> 1"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Let's use this: `df >> my_func`"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [],
"source": [
"class PandasPipeline():\n",
" def __init__(self, func):\n",
" self.func = func\n",
" \n",
" def __rrshift__(self, df):\n",
" if is_pandas(df):\n",
" return self.func(df)\n",
" return NotImplemented"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def my_func(df):\n",
" return df + 1\n",
"\n",
"my_func_pipe = PandasPipeline(my_func)\n",
"\n",
"df >> my_func_pipe"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"**Fun Python Fact:** *Anything* can be a decorator, if it's callable"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@PandasPipeline\n",
"def my_func(df):\n",
" return df + 1\n",
"\n",
"df >> my_func"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Some people like functions with more than one argument"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"*Fine...*"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [],
"source": [
"def pipeline(func):\n",
" def decorated(*args, **kw):\n",
" return PandasPipeline(func, *args, **kw)\n",
" return decorated\n",
"\n",
"class PandasPipeline():\n",
" def __init__(self, func, *args, **kw):\n",
" self.func = func\n",
" self.args = args\n",
" self.kw = kw\n",
" \n",
" def __rrshift__(self, df):\n",
" if is_pandas(df):\n",
" return self.func(df, *self.args, **self.kw)\n",
" return NotImplemented"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@pipeline\n",
"def add_n(df, n):\n",
" return df + n\n",
"\n",
"df >> add_n(1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"But you know someone will do this:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<__main__.PandasPipeline at 0x7ff34a7c13d0>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add_n(df, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"*Fine...*"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [],
"source": [
"def pipeline(n_df=1):\n",
" def decorator(func):\n",
" def decorated(*args, **kw):\n",
" if (len(args) >= n_df and \n",
" all(is_pandas(arg) for arg in args[:n_df])):\n",
" return func(*args, **kw)\n",
" return PandasPipeline(func, *args, **kw)\n",
" return decorated\n",
" return decorator"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"@pipeline()\n",
"def add_n(df, n):\n",
" return df + n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df >> add_n(1)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add_n(df, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"But this works..."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>-2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 -2"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add_n(df, df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"...while this doesn't"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "add_n() missing 1 required positional argument: 'n'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-15-c1586a3c0af2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m>>\u001b[0m \u001b[0madd_n\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-10-abd9dd81dad0>\u001b[0m in \u001b[0;36mdecorated\u001b[0;34m(*args, **kw)\u001b[0m\n\u001b[1;32m 4\u001b[0m if (len(args) >= n_df and \n\u001b[1;32m 5\u001b[0m all(is_pandas(arg) for arg in args[:n_df])):\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mPandasPipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdecorated\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: add_n() missing 1 required positional argument: 'n'"
]
}
],
"source": [
"df >> add_n(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"*Fine...*"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"We could catch the `TypeError`, but that could be raised by many other things. We just want to test the binding of arguments."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<BoundArguments (df= a b\n",
"0 1 -1, n= a b\n",
"0 1 -1)>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from inspect import signature\n",
"\n",
"def add_n(df, n):\n",
" return df + n\n",
"\n",
"signature(add_n).bind(df, df)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "missing a required argument: 'n'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-17-05a6cc18d925>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msignature\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madd_n\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/miniconda2/envs/data3/lib/python3.7/inspect.py\u001b[0m in \u001b[0;36mbind\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 3013\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mpassed\u001b[0m \u001b[0marguments\u001b[0m \u001b[0mcan\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mbe\u001b[0m \u001b[0mbound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3014\u001b[0m \"\"\"\n\u001b[0;32m-> 3015\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_bind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3016\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3017\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mbind_partial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda2/envs/data3/lib/python3.7/inspect.py\u001b[0m in \u001b[0;36m_bind\u001b[0;34m(self, args, kwargs, partial)\u001b[0m\n\u001b[1;32m 2928\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'missing a required argument: {arg!r}'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2929\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparam\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2930\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2931\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2932\u001b[0m \u001b[0;31m# We have a positional argument to process\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: missing a required argument: 'n'"
]
}
],
"source": [
"signature(add_n).bind(df)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"def pipeline(n_df=1):\n",
" def decorator(func):\n",
" def decorated(*args, **kw):\n",
" if (len(args) >= n_df and \n",
" all(is_pandas(arg) for arg in args[:n_df])):\n",
" try:\n",
" signature(func).bind(*args, **kw)\n",
" except TypeError:\n",
" pass\n",
" else:\n",
" return func(*args, **kw)\n",
" return PandasPipeline(func, *args, **kw)\n",
" return decorated\n",
" return decorator"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"@pipeline()\n",
"def add_n(df, n):\n",
" return df + n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>-2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 -2"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df >> add_n(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"But you know someone's going to do this:"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "unsupported operand type(s) for >>: 'DataFrame' and 'function'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-21-ebed7a696ae3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m>>\u001b[0m \u001b[0madd_n\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for >>: 'DataFrame' and 'function'"
]
}
],
"source": [
"@pipeline\n",
"def add_n(df, n):\n",
" return df + n\n",
"\n",
"df >> add_n(1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"I did this while writing *this presentation*!"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"*Fine...*"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [],
"source": [
"from inspect import isroutine\n",
"\n",
"def pipeline(n_df=1):\n",
" def decorator(func):\n",
" def decorated(*args, **kw):\n",
" if (len(args) >= n_df and \n",
" all(is_pandas(arg) for arg in args[:n_df])):\n",
" try:\n",
" signature(func).bind(*args, **kw)\n",
" except TypeError:\n",
" pass\n",
" else:\n",
" return func(*args, **kw)\n",
" return PandasPipeline(func, *args, **kw)\n",
" return decorated\n",
" \n",
" if isroutine(n_df):\n",
" func = n_df\n",
" n_df = 1\n",
" return decorator(func)\n",
" \n",
" return decorator"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 0"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@pipeline\n",
"def add_n(df, n):\n",
" return df + n\n",
"\n",
"df >> add_n(1)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 0"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@pipeline()\n",
"def add_n(df, n):\n",
" return df + n\n",
"\n",
"df >> add_n(1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Finally, we get that beautiful syntax:\n",
"\n",
"```python\n",
"(df.replace(-1, nan) >> my_func()).to_dict()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"No, that's even worse. I want this:\n",
"\n",
"```python\n",
"df >> replace(-1, nan) >> my_func() >> to_dict()\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"*Fine...*"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['T',\n",
" '_AXIS_ALIASES',\n",
" '_AXIS_IALIASES',\n",
" '_AXIS_LEN',\n",
" '_AXIS_NAMES',\n",
" '_AXIS_NUMBERS',\n",
" '_AXIS_ORDERS',\n",
" '_AXIS_REVERSED',\n",
" '__abs__',\n",
" '__add__',\n",
" '__and__',\n",
" '__array__',\n",
" '__array_priority__',\n",
" '__array_wrap__',\n",
" '__bool__',\n",
" '__class__',\n",
" '__contains__',\n",
" '__copy__',\n",
" '__deepcopy__',\n",
" '__delattr__',\n",
" '__delitem__',\n",
" '__dict__',\n",
" '__dir__',\n",
" '__div__',\n",
" '__doc__',\n",
" '__eq__',\n",
" '__finalize__',\n",
" '__floordiv__',\n",
" '__format__',\n",
" '__ge__',\n",
" '__getattr__',\n",
" '__getattribute__',\n",
" '__getitem__',\n",
" '__getstate__',\n",
" '__gt__',\n",
" '__hash__',\n",
" '__iadd__',\n",
" '__iand__',\n",
" '__ifloordiv__',\n",
" '__imod__',\n",
" '__imul__',\n",
" '__init__',\n",
" '__init_subclass__',\n",
" '__invert__',\n",
" '__ior__',\n",
" '__ipow__',\n",
" '__isub__',\n",
" '__iter__',\n",
" '__itruediv__',\n",
" '__ixor__',\n",
" '__le__',\n",
" '__len__',\n",
" '__lt__',\n",
" '__matmul__',\n",
" '__mod__',\n",
" '__module__',\n",
" '__mul__',\n",
" '__ne__',\n",
" '__neg__',\n",
" '__new__',\n",
" '__nonzero__',\n",
" '__or__',\n",
" '__pos__',\n",
" '__pow__',\n",
" '__radd__',\n",
" '__rand__',\n",
" '__rdiv__',\n",
" '__reduce__',\n",
" '__reduce_ex__',\n",
" '__repr__',\n",
" '__rfloordiv__',\n",
" '__rmatmul__',\n",
" '__rmod__',\n",
" '__rmul__',\n",
" '__ror__',\n",
" '__round__',\n",
" '__rpow__',\n",
" '__rsub__',\n",
" '__rtruediv__',\n",
" '__rxor__',\n",
" '__setattr__',\n",
" '__setitem__',\n",
" '__setstate__',\n",
" '__sizeof__',\n",
" '__str__',\n",
" '__sub__',\n",
" '__subclasshook__',\n",
" '__truediv__',\n",
" '__weakref__',\n",
" '__xor__',\n",
" '_accessors',\n",
" '_add_numeric_operations',\n",
" '_add_series_only_operations',\n",
" '_add_series_or_dataframe_operations',\n",
" '_agg_by_level',\n",
" '_agg_examples_doc',\n",
" '_agg_summary_and_see_also_doc',\n",
" '_aggregate',\n",
" '_aggregate_multiple_funcs',\n",
" '_align_frame',\n",
" '_align_series',\n",
" '_box_col_values',\n",
" '_box_item_values',\n",
" '_builtin_table',\n",
" '_check_inplace_setting',\n",
" '_check_is_chained_assignment_possible',\n",
" '_check_label_or_level_ambiguity',\n",
" '_check_percentile',\n",
" '_check_setitem_copy',\n",
" '_clear_item_cache',\n",
" '_clip_with_one_bound',\n",
" '_clip_with_scalar',\n",
" '_combine_const',\n",
" '_combine_frame',\n",
" '_combine_match_columns',\n",
" '_combine_match_index',\n",
" '_consolidate',\n",
" '_consolidate_inplace',\n",
" '_construct_axes_dict',\n",
" '_construct_axes_dict_from',\n",
" '_construct_axes_from_arguments',\n",
" '_constructor',\n",
" '_constructor_expanddim',\n",
" '_constructor_sliced',\n",
" '_convert',\n",
" '_count_level',\n",
" '_create_indexer',\n",
" '_cython_table',\n",
" '_data',\n",
" '_deprecations',\n",
" '_dir_additions',\n",
" '_dir_deletions',\n",
" '_drop_axis',\n",
" '_drop_labels_or_levels',\n",
" '_ensure_valid_index',\n",
" '_find_valid_index',\n",
" '_from_arrays',\n",
" '_from_axes',\n",
" '_get_agg_axis',\n",
" '_get_axis',\n",
" '_get_axis_name',\n",
" '_get_axis_number',\n",
" '_get_axis_resolvers',\n",
" '_get_block_manager_axis',\n",
" '_get_bool_data',\n",
" '_get_cacher',\n",
" '_get_index_resolvers',\n",
" '_get_item_cache',\n",
" '_get_label_or_level_values',\n",
" '_get_numeric_data',\n",
" '_get_space_character_free_column_resolvers',\n",
" '_get_value',\n",
" '_get_values',\n",
" '_getitem_bool_array',\n",
" '_getitem_frame',\n",
" '_getitem_multilevel',\n",
" '_gotitem',\n",
" '_iget_item_cache',\n",
" '_indexed_same',\n",
" '_info_axis',\n",
" '_info_axis_name',\n",
" '_info_axis_number',\n",
" '_info_repr',\n",
" '_init_mgr',\n",
" '_internal_get_values',\n",
" '_internal_names',\n",
" '_internal_names_set',\n",
" '_is_builtin_func',\n",
" '_is_cached',\n",
" '_is_copy',\n",
" '_is_cython_func',\n",
" '_is_datelike_mixed_type',\n",
" '_is_homogeneous_type',\n",
" '_is_label_or_level_reference',\n",
" '_is_label_reference',\n",
" '_is_level_reference',\n",
" '_is_mixed_type',\n",
" '_is_numeric_mixed_type',\n",
" '_is_view',\n",
" '_ix',\n",
" '_ixs',\n",
" '_join_compat',\n",
" '_maybe_cache_changed',\n",
" '_maybe_update_cacher',\n",
" '_metadata',\n",
" '_needs_reindex_multi',\n",
" '_obj_with_exclusions',\n",
" '_protect_consolidate',\n",
" '_reduce',\n",
" '_reindex_axes',\n",
" '_reindex_columns',\n",
" '_reindex_index',\n",
" '_reindex_multi',\n",
" '_reindex_with_indexers',\n",
" '_repr_data_resource_',\n",
" '_repr_fits_horizontal_',\n",
" '_repr_fits_vertical_',\n",
" '_repr_html_',\n",
" '_repr_latex_',\n",
" '_reset_cache',\n",
" '_reset_cacher',\n",
" '_sanitize_column',\n",
" '_selected_obj',\n",
" '_selection',\n",
" '_selection_list',\n",
" '_selection_name',\n",
" '_series',\n",
" '_set_as_cached',\n",
" '_set_axis',\n",
" '_set_axis_name',\n",
" '_set_is_copy',\n",
" '_set_item',\n",
" '_set_value',\n",
" '_setitem_array',\n",
" '_setitem_frame',\n",
" '_setitem_slice',\n",
" '_setup_axes',\n",
" '_shallow_copy',\n",
" '_slice',\n",
" '_stat_axis',\n",
" '_stat_axis_name',\n",
" '_stat_axis_number',\n",
" '_to_dict_of_blocks',\n",
" '_try_aggregate_string_function',\n",
" '_typ',\n",
" '_unpickle_frame_compat',\n",
" '_unpickle_matrix_compat',\n",
" '_update_inplace',\n",
" '_validate_dtype',\n",
" '_values',\n",
" '_where',\n",
" '_xs',\n",
" 'abs',\n",
" 'add',\n",
" 'add_prefix',\n",
" 'add_suffix',\n",
" 'agg',\n",
" 'aggregate',\n",
" 'align',\n",
" 'all',\n",
" 'any',\n",
" 'append',\n",
" 'apply',\n",
" 'applymap',\n",
" 'as_blocks',\n",
" 'as_matrix',\n",
" 'asfreq',\n",
" 'asof',\n",
" 'assign',\n",
" 'astype',\n",
" 'at',\n",
" 'at_time',\n",
" 'axes',\n",
" 'between_time',\n",
" 'bfill',\n",
" 'blocks',\n",
" 'bool',\n",
" 'boxplot',\n",
" 'clip',\n",
" 'clip_lower',\n",
" 'clip_upper',\n",
" 'columns',\n",
" 'combine',\n",
" 'combine_first',\n",
" 'compound',\n",
" 'copy',\n",
" 'corr',\n",
" 'corrwith',\n",
" 'count',\n",
" 'cov',\n",
" 'cummax',\n",
" 'cummin',\n",
" 'cumprod',\n",
" 'cumsum',\n",
" 'describe',\n",
" 'diff',\n",
" 'div',\n",
" 'divide',\n",
" 'dot',\n",
" 'drop',\n",
" 'drop_duplicates',\n",
" 'droplevel',\n",
" 'dropna',\n",
" 'dtypes',\n",
" 'duplicated',\n",
" 'empty',\n",
" 'eq',\n",
" 'equals',\n",
" 'eval',\n",
" 'ewm',\n",
" 'expanding',\n",
" 'explode',\n",
" 'ffill',\n",
" 'fillna',\n",
" 'filter',\n",
" 'first',\n",
" 'first_valid_index',\n",
" 'floordiv',\n",
" 'from_dict',\n",
" 'from_items',\n",
" 'from_records',\n",
" 'ftypes',\n",
" 'ge',\n",
" 'get',\n",
" 'get_dtype_counts',\n",
" 'get_ftype_counts',\n",
" 'get_value',\n",
" 'get_values',\n",
" 'groupby',\n",
" 'gt',\n",
" 'head',\n",
" 'hist',\n",
" 'iat',\n",
" 'idxmax',\n",
" 'idxmin',\n",
" 'iloc',\n",
" 'index',\n",
" 'infer_objects',\n",
" 'info',\n",
" 'insert',\n",
" 'interpolate',\n",
" 'is_copy',\n",
" 'isin',\n",
" 'isna',\n",
" 'isnull',\n",
" 'items',\n",
" 'iteritems',\n",
" 'iterrows',\n",
" 'itertuples',\n",
" 'ix',\n",
" 'join',\n",
" 'keys',\n",
" 'kurt',\n",
" 'kurtosis',\n",
" 'last',\n",
" 'last_valid_index',\n",
" 'le',\n",
" 'loc',\n",
" 'lookup',\n",
" 'lt',\n",
" 'mad',\n",
" 'mask',\n",
" 'max',\n",
" 'mean',\n",
" 'median',\n",
" 'melt',\n",
" 'memory_usage',\n",
" 'merge',\n",
" 'min',\n",
" 'mod',\n",
" 'mode',\n",
" 'mul',\n",
" 'multiply',\n",
" 'ndim',\n",
" 'ne',\n",
" 'nlargest',\n",
" 'notna',\n",
" 'notnull',\n",
" 'nsmallest',\n",
" 'nunique',\n",
" 'pct_change',\n",
" 'pipe',\n",
" 'pivot',\n",
" 'pivot_table',\n",
" 'plot',\n",
" 'pop',\n",
" 'pow',\n",
" 'prod',\n",
" 'product',\n",
" 'quantile',\n",
" 'query',\n",
" 'radd',\n",
" 'rank',\n",
" 'rdiv',\n",
" 'reindex',\n",
" 'reindex_like',\n",
" 'rename',\n",
" 'rename_axis',\n",
" 'reorder_levels',\n",
" 'replace',\n",
" 'resample',\n",
" 'reset_index',\n",
" 'rfloordiv',\n",
" 'rmod',\n",
" 'rmul',\n",
" 'rolling',\n",
" 'round',\n",
" 'rpow',\n",
" 'rsub',\n",
" 'rtruediv',\n",
" 'sample',\n",
" 'select_dtypes',\n",
" 'sem',\n",
" 'set_axis',\n",
" 'set_index',\n",
" 'set_value',\n",
" 'shape',\n",
" 'shift',\n",
" 'size',\n",
" 'skew',\n",
" 'slice_shift',\n",
" 'sort_index',\n",
" 'sort_values',\n",
" 'sparse',\n",
" 'squeeze',\n",
" 'stack',\n",
" 'std',\n",
" 'style',\n",
" 'sub',\n",
" 'subtract',\n",
" 'sum',\n",
" 'swapaxes',\n",
" 'swaplevel',\n",
" 'tail',\n",
" 'take',\n",
" 'to_clipboard',\n",
" 'to_csv',\n",
" 'to_dense',\n",
" 'to_dict',\n",
" 'to_excel',\n",
" 'to_feather',\n",
" 'to_gbq',\n",
" 'to_hdf',\n",
" 'to_html',\n",
" 'to_json',\n",
" 'to_latex',\n",
" 'to_msgpack',\n",
" 'to_numpy',\n",
" 'to_parquet',\n",
" 'to_period',\n",
" 'to_pickle',\n",
" 'to_records',\n",
" 'to_sparse',\n",
" 'to_sql',\n",
" 'to_stata',\n",
" 'to_string',\n",
" 'to_timestamp',\n",
" 'to_xarray',\n",
" 'transform',\n",
" 'transpose',\n",
" 'truediv',\n",
" 'truncate',\n",
" 'tshift',\n",
" 'tz_convert',\n",
" 'tz_localize',\n",
" 'unstack',\n",
" 'update',\n",
" 'values',\n",
" 'var',\n",
" 'where',\n",
" 'xs']"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dir(pd.DataFrame)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"def method_pipeline(func):\n",
" def decorated(*args, **kw):\n",
" return PandasPipeline(func, *args, **kw)\n",
" return decorated\n",
"\n",
"global_dict = globals()\n",
"\n",
"for method in dir(pd.DataFrame):\n",
" if 'a' <= method[0] <= 'z':\n",
" global_dict[method] = method_pipeline(getattr(pd.DataFrame,\n",
" method))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'a': {0: 2}, 'b': {0: nan}}"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df >> replace(-1, nan) >> add_n(1) >> to_dict()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## Summary"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [],
"source": [
"elegant_syntax = True"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true,
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [],
"source": [
"python_internals = True"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"text/plain": [
"<__main__.PandasPipeline at 0x7ff34a730e50>"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all([elegant_syntax, python_internals])"
]
}
],
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment