Last active
August 5, 2020 02:07
-
-
Save gautamborgohain/e5b37fafc5f4c8559381689413431b40 to your computer and use it in GitHub Desktop.
pers/playground/notebooks/writing_code_python_way.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "metadata": { | |
| "toc": true | |
| }, | |
| "cell_type": "markdown", | |
| "source": "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Python\" data-toc-modified-id=\"Python-1\"><span class=\"toc-item-num\">1 </span>Python</a></span><ul class=\"toc-item\"><li><span><a href=\"#Writing-code-the-python-way\" data-toc-modified-id=\"Writing-code-the-python-way-1.1\"><span class=\"toc-item-num\">1.1 </span>Writing code the python way</a></span><ul class=\"toc-item\"><li><span><a href=\"#List,-Dict-comprehensions-and-generator-expressions\" data-toc-modified-id=\"List,-Dict-comprehensions-and-generator-expressions-1.1.1\"><span class=\"toc-item-num\">1.1.1 </span>List, Dict comprehensions and generator expressions</a></span><ul class=\"toc-item\"><li><span><a href=\"#Generators\" data-toc-modified-id=\"Generators-1.1.1.1\"><span class=\"toc-item-num\">1.1.1.1 </span>Generators</a></span></li></ul></li><li><span><a href=\"#Looping-Backwards\" data-toc-modified-id=\"Looping-Backwards-1.1.2\"><span class=\"toc-item-num\">1.1.2 </span>Looping Backwards</a></span></li><li><span><a href=\"#Partials\" data-toc-modified-id=\"Partials-1.1.3\"><span class=\"toc-item-num\">1.1.3 </span>Partials</a></span></li><li><span><a href=\"#For-else\" data-toc-modified-id=\"For-else-1.1.4\"><span class=\"toc-item-num\">1.1.4 </span>For else</a></span></li><li><span><a href=\"#Defaultdict\" data-toc-modified-id=\"Defaultdict-1.1.5\"><span class=\"toc-item-num\">1.1.5 </span>Defaultdict</a></span></li><li><span><a href=\"#Dict---popitem()\" data-toc-modified-id=\"Dict---popitem()-1.1.6\"><span class=\"toc-item-num\">1.1.6 </span>Dict - popitem()</a></span></li><li><span><a href=\"#ChainMap\" data-toc-modified-id=\"ChainMap-1.1.7\"><span class=\"toc-item-num\">1.1.7 </span>ChainMap</a></span></li><li><span><a href=\"#Decorators\" data-toc-modified-id=\"Decorators-1.1.8\"><span class=\"toc-item-num\">1.1.8 </span>Decorators</a></span><ul class=\"toc-item\"><li><span><a href=\"#Timer\" data-toc-modified-id=\"Timer-1.1.8.1\"><span class=\"toc-item-num\">1.1.8.1 </span>Timer</a></span></li><li><span><a href=\"#Debug\" data-toc-modified-id=\"Debug-1.1.8.2\"><span class=\"toc-item-num\">1.1.8.2 </span>Debug</a></span></li><li><span><a href=\"#Repeat\" data-toc-modified-id=\"Repeat-1.1.8.3\"><span class=\"toc-item-num\">1.1.8.3 </span>Repeat</a></span></li><li><span><a href=\"#Singleton\" data-toc-modified-id=\"Singleton-1.1.8.4\"><span class=\"toc-item-num\">1.1.8.4 </span>Singleton</a></span></li></ul></li><li><span><a href=\"#Deque\" data-toc-modified-id=\"Deque-1.1.9\"><span class=\"toc-item-num\">1.1.9 </span>Deque</a></span></li><li><span><a href=\"#Context-Managers\" data-toc-modified-id=\"Context-Managers-1.1.10\"><span class=\"toc-item-num\">1.1.10 </span>Context Managers</a></span><ul class=\"toc-item\"><li><span><a href=\"#Using-contextlib-decorator\" data-toc-modified-id=\"Using-contextlib-decorator-1.1.10.1\"><span class=\"toc-item-num\">1.1.10.1 </span>Using contextlib decorator</a></span></li></ul></li></ul></li></ul></li></ul></div>" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "# Python" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "## Writing code the python way" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### List, Dict comprehensions and generator expressions" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "l = [i*2 for i in range(10)]\nd = {i:i*2 for i in range(10)}\nge = (i*2 for i in range(10))", | |
| "execution_count": 1, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "#### Generators" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:32:22.379664Z", | |
| "start_time": "2020-08-04T01:32:22.374233Z" | |
| }, | |
| "scrolled": false, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from pathlib import Path\n\n# Say you want to process files in a directory, but there are many files\n\n# A directory with many files\na_dir = Path(\"/Users/gautamborgohain/pers/covid19/data/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports\")\nlen(list(a_dir.rglob(\"*\"))) # 61\n\ndef process_files(fname):\n \"\"\"\n A function that does some important work\n \"\"\"\n a = 5+10\n return a", | |
| "execution_count": 28, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:32:22.644584Z", | |
| "start_time": "2020-08-04T01:32:22.641340Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "files_list = a_dir.rglob(\"*\") # Returns a generator", | |
| "execution_count": 29, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:32:29.225834Z", | |
| "start_time": "2020-08-04T01:32:23.466630Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%timeit\n\nresults = {'a':0}\nfor f_name in files_list:\n a = process_files(f_name)\n results['a'] = results['a'] + a\n ", | |
| "execution_count": 30, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "70.6 ns ± 0.605 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:32:50.485628Z", | |
| "start_time": "2020-08-04T01:32:50.478665Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "files_list = list(a_dir.rglob(\"*\")) # Convert the generator to a list", | |
| "execution_count": 31, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:32:57.852715Z", | |
| "start_time": "2020-08-04T01:32:51.535893Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%timeit \n\nresults = {'a':0}\nfor f_name in files_list:\n a = process_files(f_name)\n results['a'] = results['a'] + a", | |
| "execution_count": 32, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "7.68 µs ± 53.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "Using a generator is much faster - nanosecs vs micorsecs when iterating through a list 🤔" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### Looping Backwards" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "persons = [\"first\", \"second\", \"third\", \"zero\"]\nfor person in reversed(persons):\n\tprint(person)\n\nprint(persons) # `reversed()` *returns* the reversed list\n\n# however, to do in-place, used reverse\n\npersons.reverse() # returns None, reverses inplace\nfor person in persons:\n\tprint(person)", | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "zero\nthird\nsecond\nfirst\n['first', 'second', 'third', 'zero']\nzero\nthird\nsecond\nfirst\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### Partials" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from functools import partial\n\ndef match(person1, person2):\n\treturn f\"hello {person1} and {person2}\"\n\nmatch_gautam = partial(match, \"gautam\")\n\nmatch_gautam(\"new_person\")", | |
| "execution_count": 30, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "'hello gautam and new_person'" | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "help(iter)", | |
| "execution_count": 10, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "Help on built-in function iter in module builtins:\n\niter(...)\n iter(iterable) -> iterator\n iter(callable, sentinel) -> iterator\n \n Get an iterator from an object. In the first form, the argument must\n supply its own iterator, or be a sequence.\n In the second form, the callable is called until it returns the sentinel.\n\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "Function to loop through random selections of a list and print that selection, and stop when a sentinal value is reached" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "import random\n\nvalues = ['a', 'b', 'c', 'sentinel', 'd']\n\n#iter needs a callable that it can iterate over and an optional sentinel value\n#since this case is a random picking of value from a list, this lambda function is just there to be made repeated calls to\n\nfor meeting in iter(lambda : random.sample(values, 1).pop() , 'sentinel'): \n print(meeting)\n", | |
| "execution_count": 53, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "a\nd\nb\na\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### For else" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "#Return x back if found in values \n#Here an else block is used to write for case when the for-loop finshes without the break being executed\n\nvalues = ['a', 'b', 'c', 'sentinel', 'd']\n\ndef find(x, values):\n for v in values:\n if v == x:\n break\n else:\n return -1\n return v\n\nfind(\"d\", values)\n", | |
| "execution_count": 57, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "'d'" | |
| }, | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "find(\"gautam\", values)", | |
| "execution_count": 55, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "-1" | |
| }, | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### Defaultdict" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from collections import defaultdict\n\nd = defaultdict(int)\nfor v in values:\n d[v] += 1\n\nprint(d) ", | |
| "execution_count": 58, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "defaultdict(<class 'int'>, {'a': 1, 'b': 1, 'c': 1, 'sentinel': 1, 'd': 1})\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "d = defaultdict(list)\nfor v in values:\n d[v].append(\"val\")\n\nprint(d) ", | |
| "execution_count": 59, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "defaultdict(<class 'list'>, {'a': ['val'], 'b': ['val'], 'c': ['val'], 'sentinel': ['val'], 'd': ['val']})\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### Dict - popitem()\n\npopitem removes an arbitrary item and is atomic so you can use threads to pull out elements without putting locks around it" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "while d:\n k, v = d.popitem()\n print(k,v)", | |
| "execution_count": 60, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "d ['val']\nsentinel ['val']\nc ['val']\nb ['val']\na ['val']\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### ChainMap\n\nTo chain env vars" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:40:11.781023Z", | |
| "start_time": "2020-08-04T02:40:11.777095Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from collections import ChainMap\n\np1_dict = {'a':10, 'b':10}\np2_dict = {'a':5, 'b':5, 'c':5}\np3_dict = {'a':2, 'b':2, 'c':2, 'd':2}\n\nfinal_dict = ChainMap(p1_dict, p2_dict, p3_dict)", | |
| "execution_count": 82, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:40:11.938164Z", | |
| "start_time": "2020-08-04T02:40:11.934594Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "final_dict # contains all the dicts added", | |
| "execution_count": 83, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "ChainMap({'a': 10, 'b': 10}, {'a': 5, 'b': 5, 'c': 5}, {'a': 2, 'b': 2, 'c': 2, 'd': 2})" | |
| }, | |
| "execution_count": 83, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:40:13.620287Z", | |
| "start_time": "2020-08-04T02:40:13.615348Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "final_dict['a'], final_dict['c'], final_dict['d'] # it will cascade lookup across the dicts", | |
| "execution_count": 84, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "(10, 5, 2)" | |
| }, | |
| "execution_count": 84, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:41:18.427323Z", | |
| "start_time": "2020-08-04T02:41:18.423832Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "list(final_dict.keys())", | |
| "execution_count": 88, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "['a', 'b', 'c', 'd']" | |
| }, | |
| "execution_count": 88, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "To add another dict to the final_dict:" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:42:50.450329Z", | |
| "start_time": "2020-08-04T02:42:50.445198Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "p4_dict = {'a':2, 'b':2, 'c':2, 'd':2, 'e': 1}\nfinal_dict = final_dict.new_child(p4_dict)\nlist(final_dict.keys())", | |
| "execution_count": 91, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "['a', 'b', 'c', 'd', 'e']" | |
| }, | |
| "execution_count": 91, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### Decorators" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:25:44.285989Z", | |
| "start_time": "2020-08-03T06:25:44.280771Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "import functools\nimport time\n\ndef cache(func):\n \"\"\"Keep a cache of previous function calls\"\"\"\n @functools.wraps(func)\n def wrapper_cache(*args, **kwargs):\n cache_key = args + tuple(kwargs.items())\n if cache_key not in wrapper_cache.cache:\n wrapper_cache.cache[cache_key] = func(*args, **kwargs)\n return wrapper_cache.cache[cache_key]\n wrapper_cache.cache = dict()\n return wrapper_cache\n\n", | |
| "execution_count": 27, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:26:21.604524Z", | |
| "start_time": "2020-08-03T06:26:11.290971Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def func(x):\n time.sleep(0.1)\n return x**100\n\n%timeit -n 10 -r 10 _ = func(10)", | |
| "execution_count": 29, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "103 ms ± 457 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:26:28.062197Z", | |
| "start_time": "2020-08-03T06:26:27.952486Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "## Same function but decorated with `cache` to cache the results of the function given an input\n@cache\ndef func(x):\n time.sleep(0.1)\n return x**100\n\n%timeit -n 10 -r 10 _ = func(10)", | |
| "execution_count": 30, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "The slowest run took 20429.10 times longer than the fastest. This could mean that an intermediate result is being cached.\n1.04 ms ± 3.13 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:22:05.358951Z", | |
| "start_time": "2020-08-03T06:22:05.355061Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%time\n\n@cache\ndef func(x):\n return x**100\n\n_ = func(10)\n_ = func(100)\n#Again\n_ = func(10)\n_ = func(100)", | |
| "execution_count": 10, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "CPU times: user 23 µs, sys: 0 ns, total: 23 µs\nWall time: 26.2 µs\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "#### Timer" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:51:40.995219Z", | |
| "start_time": "2020-08-03T06:51:40.888582Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "\nimport functools\nimport time\n\ndef timer(func):\n \"\"\"Print the runtime of the decorated function\"\"\"\n @functools.wraps(func)\n def wrapper_timer(*args, **kwargs):\n start_time = time.perf_counter() # 1\n value = func(*args, **kwargs)\n end_time = time.perf_counter() # 2\n run_time = end_time - start_time # 3\n print(f\"Finished {func.__name__!r} in {run_time:.4f} secs\")\n return value\n return wrapper_timer\n\n@timer\ndef func(x):\n time.sleep(0.1)\n return x**100\n\n_ = func(10)", | |
| "execution_count": 34, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "Finished 'func' in 0.1007 secs\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "#### Debug" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:52:10.720358Z", | |
| "start_time": "2020-08-03T06:52:10.609588Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def debug(func):\n \"\"\"Print the function signature and return value\"\"\"\n @functools.wraps(func)\n def wrapper_debug(*args, **kwargs):\n args_repr = [repr(a) for a in args] # 1\n kwargs_repr = [f\"{k}={v!r}\" for k, v in kwargs.items()] # 2\n signature = \", \".join(args_repr + kwargs_repr) # 3\n print(f\"Calling {func.__name__}({signature})\")\n value = func(*args, **kwargs)\n print(f\"{func.__name__!r} returned {value!r}\") # 4\n return value\n return wrapper_debug\n\n@debug\ndef func(x):\n time.sleep(0.1)\n return x**100\n\n_ = func(10)", | |
| "execution_count": 35, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "Calling func(10)\n'func' returned 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "#### Repeat" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:53:37.210410Z", | |
| "start_time": "2020-08-03T06:53:36.683855Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def repeat(_func=None, *, num_times=2):\n def decorator_repeat(func):\n @functools.wraps(func)\n def wrapper_repeat(*args, **kwargs):\n for _ in range(num_times):\n value = func(*args, **kwargs)\n return value\n return wrapper_repeat\n\n if _func is None:\n return decorator_repeat\n else:\n return decorator_repeat(_func)\n \n@repeat(num_times=5)\n@debug\ndef func(x):\n time.sleep(0.1)\n return x**100\n\n_ = func(10) ", | |
| "execution_count": 38, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "Calling func(10)\n'func' returned 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\nCalling func(10)\n'func' returned 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\nCalling func(10)\n'func' returned 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\nCalling func(10)\n'func' returned 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\nCalling func(10)\n'func' returned 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "⚠️The order of the the decorators also matters" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:53:55.086623Z", | |
| "start_time": "2020-08-03T06:53:54.570784Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "@debug\n@repeat(num_times=5)\ndef func(x):\n time.sleep(0.1)\n return x**100\n\n_ = func(10) ", | |
| "execution_count": 39, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "Calling func(10)\n'func' returned 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "#### Singleton" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-03T06:56:11.870223Z", | |
| "start_time": "2020-08-03T06:56:11.863199Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def singleton(cls):\n \"\"\"Make a class a Singleton class (only one instance)\"\"\"\n @functools.wraps(cls)\n def wrapper_singleton(*args, **kwargs):\n if not wrapper_singleton.instance:\n wrapper_singleton.instance = cls(*args, **kwargs)\n return wrapper_singleton.instance\n wrapper_singleton.instance = None\n return wrapper_singleton\n\n@singleton\nclass TheOne:\n pass\n\na = TheOne()\nb = TheOne()\nid(a), id(b)", | |
| "execution_count": 40, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": "(140389203430096, 140389203430096)" | |
| }, | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### Deque" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:43:15.485266Z", | |
| "start_time": "2020-08-04T01:43:15.481510Z" | |
| }, | |
| "scrolled": false, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from collections import deque\nfrom pathlib import Path\n\n# Say you want to process files in a directory, but there are many files, \n# If files_list is very big and we want to remove elements from it as we process them\n# inorder to reduce the memory footprint\n# lets see the time of iterating through \n# these files using list vs a deque\n\n\n# A directory with not so many files\nfiles_list = [\"fname\"]*10000\nlen(files_list)\n\n\ndef process_files(fname):\n \"\"\"\n A function that does some important work\n \"\"\"\n a = 5+10\n return a\n", | |
| "execution_count": 43, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:46:18.173278Z", | |
| "start_time": "2020-08-04T01:46:08.049191Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%timeit \n\nfiles_list = [\"fname\"]*10000\nlist_len = len(files_list)\n\nfor i in range(list_len):\n _ = process_files(files_list.pop())\n ", | |
| "execution_count": 53, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "1.24 ms ± 4.99 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:46:32.781137Z", | |
| "start_time": "2020-08-04T01:46:23.298774Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%timeit \n\nfiles_deque = deque([\"fname\"]*10000)\ndeque_len = len(files_deque)\n\nfor i in range(deque_len):\n _ = process_files(files_deque.pop())\n ", | |
| "execution_count": 54, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "1.16 ms ± 9.37 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "Not much difference to be found here, lets use a line profiler to comapare the pop operations of list vs deque" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:50:25.925155Z", | |
| "start_time": "2020-08-04T01:50:25.506407Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%load_ext line_profiler", | |
| "execution_count": 59, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:53:15.029702Z", | |
| "start_time": "2020-08-04T01:53:15.006294Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def run_w_list():\n files_list = [\"fname\"]*10000\n list_len = len(files_list)\n\n for i in range(list_len):\n _ = process_files(files_list.pop())\n \n%lprun -f run_w_list run_w_list() ", | |
| "execution_count": 62, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": " \n``` \nTimer unit: 1e-06 s\n\nTotal time: 0.011755 s\nFile: <ipython-input-62-f6522c897d5f>\nFunction: run_w_list at line 1\n\nLine # Hits Time Per Hit % Time Line Contents\n==============================================================\n 1 def run_w_list():\n 2 1 8.0 8.0 0.1 files_list = [\"fname\"]*10000\n 3 1 1.0 1.0 0.0 list_len = len(files_list)\n 4 \n 5 10001 3813.0 0.4 32.4 for i in range(list_len):\n 6 10000 7933.0 0.8 67.5 _ = process_files(files_list.pop())\n \n \n``` " | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T01:55:15.640511Z", | |
| "start_time": "2020-08-04T01:55:15.616466Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def run_w_deque():\n files_deque = deque([\"fname\"]*10000)\n deque_len = len(files_deque)\n\n for i in range(deque_len):\n _ = process_files(files_deque.pop())\n \n%lprun -f run_w_deque run_w_deque() ", | |
| "execution_count": 64, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": " \n``` \nTimer unit: 1e-06 s\n\nTotal time: 0.012004 s\nFile: <ipython-input-64-947014c657f4>\nFunction: run_w_deque at line 1\n \n\nLine # Hits Time Per Hit % Time Line Contents\n==============================================================\n 1 def run_w_deque():\n 2 1 81.0 81.0 0.7 files_deque = deque([\"fname\"]*10000)\n 3 1 1.0 1.0 0.0 deque_len = len(files_deque)\n 4 \n 5 10001 4042.0 0.4 33.7 for i in range(deque_len):\n 6 10000 7880.0 0.8 65.6 _ = process_files(files_deque.pop())\n \n``` " | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "Seems to be about the same... hmm. what about when accessing from the left?" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:00:22.445837Z", | |
| "start_time": "2020-08-04T02:00:16.966571Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%timeit \n\nfiles_list = [\"fname\"]*10000\nlist_len = len(files_list)\n\nfor i in range(list_len):\n _ = process_files(files_list.pop(0))\n ", | |
| "execution_count": 69, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "6.7 ms ± 46.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:00:35.235287Z", | |
| "start_time": "2020-08-04T02:00:25.705987Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "%%timeit \n\nfiles_deque = deque([\"fname\"]*10000)\ndeque_len = len(files_deque)\n\nfor i in range(deque_len):\n _ = process_files(files_deque.popleft())", | |
| "execution_count": 70, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "1.16 ms ± 8.28 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "Ahaa! Now there is some real difference" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:01:05.248748Z", | |
| "start_time": "2020-08-04T02:01:05.215283Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def run_w_list():\n files_list = [\"fname\"]*10000\n list_len = len(files_list)\n\n for i in range(list_len):\n _ = process_files(files_list.pop(0))\n \n%lprun -f run_w_list run_w_list() ", | |
| "execution_count": 71, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "```\nTimer unit: 1e-06 s\n\nTotal time: 0.022028 s\nFile: <ipython-input-71-f95514ec341d>\nFunction: run_w_list at line 1\n\nLine # Hits Time Per Hit % Time Line Contents\n==============================================================\n 1 def run_w_list():\n 2 1 8.0 8.0 0.0 files_list = [\"fname\"]*10000\n 3 1 1.0 1.0 0.0 list_len = len(files_list)\n 4 \n 5 10001 4079.0 0.4 18.5 for i in range(list_len):\n 6 10000 17940.0 1.8 81.4 _ = process_files(files_list.pop(0))\n```" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:02:01.488077Z", | |
| "start_time": "2020-08-04T02:02:01.465656Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def run_w_deque():\n files_deque = deque([\"fname\"]*10000)\n deque_len = len(files_deque)\n\n for i in range(deque_len):\n _ = process_files(files_deque.pop())\n \n%lprun -f run_w_deque run_w_deque() ", | |
| "execution_count": 72, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "```\nTimer unit: 1e-06 s\n\nTotal time: 0.011145 s\nFile: <ipython-input-72-947014c657f4>\nFunction: run_w_deque at line 1\n\nLine # Hits Time Per Hit % Time Line Contents\n==============================================================\n 1 def run_w_deque():\n 2 1 82.0 82.0 0.7 files_deque = deque([\"fname\"]*10000)\n 3 1 1.0 1.0 0.0 deque_len = len(files_deque)\n 4 \n 5 10001 3655.0 0.4 32.8 for i in range(deque_len):\n 6 10000 7407.0 0.7 66.5 _ = process_files(files_deque.pop())\n```" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "We can see in the line profiler that per_hit, the time spend is much lower, more than 2X.\n\nSo if the use case is to use a stack, it is better to use a deque than a list\n\n[stackoverflow question](https://stackoverflow.com/questions/47493446/should-i-use-a-python-deque-or-list-as-a-stack)" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "Reversing the list and then pop from right will help you avoid creating the deque to begin with.\n\nHowever, reversing a list is O(n), so for big list, sorting might not be a great option\n" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:07:20.741278Z", | |
| "start_time": "2020-08-04T02:07:20.717470Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "def run_w_list():\n files_list = [\"fname\"]*10000\n files_list.reverse()\n list_len = len(files_list)\n\n for i in range(list_len):\n _ = process_files(files_list.pop())\n \n%lprun -f run_w_list run_w_list() ", | |
| "execution_count": 74, | |
| "outputs": [] | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "```\nTimer unit: 1e-06 s\n\nTotal time: 0.012061 s\nFile: <ipython-input-74-f3a7aacc2c91>\nFunction: run_w_list at line 1\n\nLine # Hits Time Per Hit % Time Line Contents\n==============================================================\n 1 def run_w_list():\n 2 1 9.0 9.0 0.1 files_list = [\"fname\"]*10000\n 3 1 5.0 5.0 0.0 files_list.reverse()\n 4 1 1.0 1.0 0.0 list_len = len(files_list)\n 5 \n 6 10001 3911.0 0.4 32.4 for i in range(list_len):\n 7 10000 8135.0 0.8 67.4 _ = process_files(files_list.pop())\n```" | |
| }, | |
| { | |
| "metadata": {}, | |
| "cell_type": "markdown", | |
| "source": "### Context Managers" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:21:11.425124Z", | |
| "start_time": "2020-08-04T02:21:11.418317Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "class SomeConnectionOrAuthenticationManager(): \n def __init__(self, x): \n self.x = x\n self.connection = None\n \n def __enter__(self): \n print(\"Setting connection ...\")\n self.connection = 10\n return self\n \n def __exit__(self, exc_type, exc_value, exc_traceback): \n print(\"Closing connection .\")\n self.connection = None\n \nwith SomeConnectionOrAuthenticationManager(x=29) as m:\n print(f\"The connection value is {m.connection}\")", | |
| "execution_count": 75, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "Setting connection ...\nThe connection value is 10\nClosing connection .\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-04T02:26:09.969793Z", | |
| "start_time": "2020-08-04T02:26:09.965812Z" | |
| } | |
| }, | |
| "cell_type": "markdown", | |
| "source": "#### Using contextlib decorator" | |
| }, | |
| { | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-08-05T01:31:00.805121Z", | |
| "start_time": "2020-08-05T01:31:00.800858Z" | |
| }, | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "from contextlib import contextmanager\n\n@contextmanager\ndef someConnectionOrAuthenticationManager(x):\n print(\"Setting connection ...\")\n # Insted of a class object, we are using a dict to encapsulate the objects of interest\n values = {}\n values['connection'] = 10\n yield values\n print(\"Closing connection .\")\n \n \nwith someConnectionOrAuthenticationManager(29) as m:\n print(f\"The connection value is {m['connection']}\")", | |
| "execution_count": 12, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "Setting connection ...\nThe connection value is 10\nClosing connection .\n" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": false | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "name": "conda_env_playground", | |
| "display_name": "conda_env_playground", | |
| "language": "python" | |
| }, | |
| "language_info": { | |
| "name": "python", | |
| "version": "3.7.4", | |
| "mimetype": "text/x-python", | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "pygments_lexer": "ipython3", | |
| "nbconvert_exporter": "python", | |
| "file_extension": ".py" | |
| }, | |
| "neptune": { | |
| "notebookId": "f9c1fa11-5115-49e2-8a01-c89141b792fa" | |
| }, | |
| "toc": { | |
| "nav_menu": {}, | |
| "number_sections": true, | |
| "sideBar": true, | |
| "skip_h1_title": false, | |
| "base_numbering": 1, | |
| "title_cell": "Table of Contents", | |
| "title_sidebar": "Contents", | |
| "toc_cell": true, | |
| "toc_position": {}, | |
| "toc_section_display": true, | |
| "toc_window_display": true | |
| }, | |
| "gist": { | |
| "id": "", | |
| "data": { | |
| "description": "pers/playground/notebooks/writing_code_python_way.ipynb", | |
| "public": true | |
| } | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment