grahamanderson · January 25, 2018 19:10
diff --git a/Census_Percentage_Change_by_Year.ipynb b/Census_Percentage_Change_by_Year.ipynb
 {
  "cells": [
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Percentage Change for Census Variables (by Year)\n- proof of concept\n- Use to get the pct change (by year) b/w any census variable (acs1, sfl)\n- Loads external 'nationalP_places.txt' file to get PlaceFips for a city\n- Myself, I need to get Place-specific changes between variables...Like the Population Growth Mean Income. Also, I don't always know which years are going to be valid. \n- This example searches a variable for its most current year and the most vintage year\n- The Census has a septuagintillion variables...hopefully future iterations will make this easier for me :)"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import requests\nfrom bs4 import BeautifulSoup\nimport pandas as pd\nimport numpy as np\nimport us\nimport json\n\nimport sqlite3\nDB = './data/sqlite3/acs1.db'\nTABLE = 'Variables'\n\nfrom IPython.core.interactiveshell import InteractiveShell\nInteractiveShell.ast_node_interactivity = \"all\"\n\njson_keys = json.load(open('./data/api_key.json'))\nCENSUS_API_KEY = json_keys['census-api']",
      "execution_count": 207,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### Functions"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### Helper Functions"
    },
    {
      "metadata": {
        "code_folding": [
          0,
          7,
          18,
          33,
          56
        ],
        "trusted": true
      },
      "cell_type": "code",
      "source": "def get_asc_variable(df, search, limit=10):\n    '''Search in Variable and Label Columns \n       get_asc_variable(df2, 'Population', 10)\n    '''\n    return df[(df.label.str.contains(search,case=False)) |\n              df.index.str.contains(search, case=False)][['label', 'concept']][0:limit]\n\n\ndef geoid(row):\n    if row['PLACEFP'] != '00000':\n        return '16000US' + row['STATEFP'] + row['PLACEFP']\n\n\n#     elif row['city_fips'] != '00000':\n#         return '16000US' + row['state_fips'] + row['city_fips']\n    else:\n        return 'unknown'\n\n\ndef load_national_places():\n    df = pd.read_csv(\n        'https://www2.census.gov/geo/docs/reference/codes/files/national_places.txt',\n        sep=\"|\",\n        encoding='ISO-8859-1',\n        dtype={'STATEFP': 'str',\n               'PLACEFP': 'str',\n               'FUNCSTAT': 'str'})\n\n    df['geoid'] = df.apply(lambda row: geoid(row), axis=1)\n    df['name'] = df.PLACENAME.apply(lambda x: ' '.join(x.split(' ')[0:-1]))\n    df['type'] = df.PLACENAME.apply(lambda x: x.split(' ')[-1])\n    return df\n\n\ndef get_place_id(df, place, state):\n    '''\n    Helper Function to pct_change_var\n    get_place_id(places,'Osage', 'MN')\n    '''\n    if not state.isnumeric():\n        state_fips = us.states.lookup(state).fips\n    else:\n        state_fips = state\n\n    try:\n        place = df[(df.PLACENAME.str.contains(place, case=True))\n                   & (df.STATEFP == state_fips)\n                   & (df.TYPE.str.contains('County Subdivision') == False)][\n                       'PLACEFP'].iloc[0]\n        return str(place)\n\n    except Exception as e:\n        print(e)\n        return 'unknown'\n        pass\n\n\ndef vintage_or_current_var(r, places, vintage=False):\n    ''' Helper Function to pct_change_var\n        Find the First Valid Year with a Variable Value\n       https://api.census.gov/data/2014/acs1?get=NAME,B02015_009E,B02015_009M&for=state:*\n        https://api.census.gov/data/2014/acs1?get=NAME,B01001_001E&for=place:05000&in=state:48\n        '''\n    found = False\n\n    place_id = get_place_id(places, r['city'], r['state'])\n    state_fips = us.states.lookup(r['state']).fips\n\n    # look for the end or the start year...and should we go backwards or forwards\n    if vintage == True:\n        start = r['range'][0]\n        end = r['range'][-1]\n        step = 1\n    else:\n        start = r['range'][-1]\n        end = r['range'][0]\n        step = -1  # reverse order\n        #print(start, end, step)\n\n    # Find a Year with a Valid Value\n    for year in range(start, end, step):\n\n        res = requests.get(\n            '{0}/{1}/{2}?get={3}&for=place:{4}&in=state:{5}&key={6}'.format(\n                r['base'], year, r['ds'], r['key'], place_id, state_fips, r[\n                    'census_api_key']))\n\n        if (res.status_code != 404):\n\n            # return Variable (as key) with year and variable value (as vals)\n            try:\n                return [\n                    r['city'], r['state'], r['key'], r['meta'], year,\n                    int(res.json()[1][0])\n                ]\n                found = True\n            except Exception as e:\n                #print(e)\n                pass\n        else:\n            found = False\n\n    # Give Up: Never Found a Valid Year..Variable might not exist or URL is malformed\n    if not found: return np.nan",
      "execution_count": 233,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### Main Function"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Main Functon\ndef pct_change_var(r):\n    '''Main Function'''\n    vintage = vintage_or_current_var(r, places, vintage=False)\n    current = vintage_or_current_var(r, places, vintage=True)\n    vintage_yr = vintage[4]\n    vintage_val = vintage[-1]\n    current_yr = current[4]\n    current_val = current[-1]\n    pct_change = abs((current[-1] - vintage[-1])) / vintage[-1]\n\n    return [r['city'], r['state'], pct_change, r['key'], r['meta'],current_yr, current_val, vintage_yr, vintage_val]",
      "execution_count": 57,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "---\n## Loading/Cleaning"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### Load Place Fips Dataset (from census url)"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Load External PLACE Fips (national_places.txt)into a Dataframe...from Census Site\nplaces = load_national_places()\nplaces.sample(1)",
      "execution_count": 55,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 55,
          "data": {
            "text/plain": "      STATE STATEFP PLACEFP     PLACENAME                TYPE FUNCSTAT  \\\n23042    NY      36   02297  Antwerp town  County Subdivision        A   \n\n                 COUNTY           geoid     name  type  \n23042  Jefferson County  16000US3602297  Antwerp  town  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>STATE</th>\n      <th>STATEFP</th>\n      <th>PLACEFP</th>\n      <th>PLACENAME</th>\n      <th>TYPE</th>\n      <th>FUNCSTAT</th>\n      <th>COUNTY</th>\n      <th>geoid</th>\n      <th>name</th>\n      <th>type</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>23042</th>\n      <td>NY</td>\n      <td>36</td>\n      <td>02297</td>\n      <td>Antwerp town</td>\n      <td>County Subdivision</td>\n      <td>A</td>\n      <td>Jefferson County</td>\n      <td>16000US3602297</td>\n      <td>Antwerp</td>\n      <td>town</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### Load ACS1 Variable Table (from census)"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Fairly long load...might want to load the file first rather than directly in pandas :\\\njson = pd.read_json('https://api.census.gov/data/2014/acs1/variables.json')\nacs1_vars  = json.variables.apply(pd.Series) # Explode JSON into multiple columns\n\n# After the first json load...query sqlite instead...milliseconds faster ;)\n# con = sqlite3.connect(DB)\n# acs1_vars = pd.read_sql('Select * FROM {0} LIMIT {1}'.format(TABLE,2),  con)\n# con.close()",
      "execution_count": 235,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### Alternate Asc1 Load––using Requests (probably faster)"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Alternate Loading Works--might be a lot faster\n# resp = requests.get('https://api.census.gov/data/2014/acs1/variables.json')\n# data = resp.json()\n# df = pd.DataFrame(data)\n# acs1_vars_v2 = df.variables.apply(pd.Series) # Explode JSON into multiple columns",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### Write Acs1 to SQLite"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df = acs1_vars.reset_index()[['label', 'concept', 'group']]\n\n# Write the Acs1 Table\ncon = sqlite3.connect(DB)\ndf.to_sql(TABLE, con=con, index=False, if_exists='replace')\ncon.close()\n\n# Database Check\ncon = sqlite3.connect(DB)\npd.read_sql('Select * FROM {0} LIMIT {1}'.format(TABLE,2),  con)\ncon.close()",
      "execution_count": 234,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 234,
          "data": {
            "text/plain": "                                               label                 concept  \\\n0  American Indian Area/Alaska Native Area/Hawaii...  Selectable Geographies   \n1  American Indian Area/Alaska Native Area/Hawaii...  Selectable Geographies   \n\n  group  \n0   N/A  \n1   N/A  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>label</th>\n      <th>concept</th>\n      <th>group</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>American Indian Area/Alaska Native Area/Hawaii...</td>\n      <td>Selectable Geographies</td>\n      <td>N/A</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>American Indian Area/Alaska Native Area/Hawaii...</td>\n      <td>Selectable Geographies</td>\n      <td>N/A</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Find Census Variables that Apply\nthere are...like what...64k of them?! Search around."
    },
    {
      "metadata": {
        "code_folding": [
          1
        ],
        "trusted": true
      },
      "cell_type": "code",
      "source": "# Search within label (and index) for Population\nprint('Searching for Population...')\nget_asc_variable(acs1_vars, 'Population', limit=3)\n\nprint(\"Searching for part of Variable...\")\n# Search within label (and index) for Population\nget_asc_variable(acs1_vars, 'B02003', limit =3)",
      "execution_count": 221,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Searching for Population...\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 221,
          "data": {
            "text/plain": "                                                    label  \\\nB02003_002E                       Population of one race:   \nB02003_002M  Margin of Error for!!Population of one race:   \nB02003_003E          Population of one race:!!White alone   \n\n                            concept  \nB02003_002E  B02003.  Detailed Race  \nB02003_002M  B02003.  Detailed Race  \nB02003_003E  B02003.  Detailed Race  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>label</th>\n      <th>concept</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>B02003_002E</th>\n      <td>Population of one race:</td>\n      <td>B02003.  Detailed Race</td>\n    </tr>\n    <tr>\n      <th>B02003_002M</th>\n      <td>Margin of Error for!!Population of one race:</td>\n      <td>B02003.  Detailed Race</td>\n    </tr>\n    <tr>\n      <th>B02003_003E</th>\n      <td>Population of one race:!!White alone</td>\n      <td>B02003.  Detailed Race</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "text": "Searching for part of Variable...\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 221,
          "data": {
            "text/plain": "                                   label                 concept\nB02003_001E                       Total:  B02003.  Detailed Race\nB02003_001M  Margin of Error for!!Total:  B02003.  Detailed Race\nB02003_002E      Population of one race:  B02003.  Detailed Race",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>label</th>\n      <th>concept</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>B02003_001E</th>\n      <td>Total:</td>\n      <td>B02003.  Detailed Race</td>\n    </tr>\n    <tr>\n      <th>B02003_001M</th>\n      <td>Margin of Error for!!Total:</td>\n      <td>B02003.  Detailed Race</td>\n    </tr>\n    <tr>\n      <th>B02003_002E</th>\n      <td>Population of one race:</td>\n      <td>B02003.  Detailed Race</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "---\n## Analysis\nproof of concept.. <br>My intention is to use this function in an pandas apply function---over a dataframe."
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### Get Percentage Change in Total Population  for Austin, TX \nIn future, should work for any variable in the acs1, sf1, or acs5<br/>\nacs1 variables on https://api.census.gov/data/2014/acs1/variables.html"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# First and Last Valid Year Value for the Census Variable (Variable, Year, Value)\nparams = pd.Series({\n    'city': 'Austin',\n    'state': 'TX',\n    'key': 'B01003_001E',\n    'meta': 'Population Percentage Change', #Next rev:Get from asc1_vars\n    'range': [2010, 2018],\n    'ds': 'acs1',\n    'base': 'https://api.census.gov/data',\n    'census_api_key': CENSUS_API_KEY\n})\nresult = pct_change_var(params)\nresult",
      "execution_count": 222,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 222,
          "data": {
            "text/plain": "['Austin',\n 'TX',\n 0.09577287946428571,\n 'B01003_001E',\n 'Population Percentage Change',\n 2012,\n 842595,\n 2015,\n 931840]"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "print('For {0}, {1}, variable {2} ({3}) from years {4}-{5} is {6:.1%}'.format(\n    result[0],  # Austin\n    result[1],  # Texas\n    result[3],  # B01001_001E \n    result[4],  # Population Percentage Change (Description)\n    result[5],  # 2012 (Most Vintage Year)\n    result[7],  # 2015 (Most Curent Year)\n    result[2])) # Pop Percentage Change Between Years",
      "execution_count": 223,
      "outputs": [
        {
          "output_type": "stream",
          "text": "For Austin, TX, variable B01003_001E (Population Percentage Change) from years 2012-2015 is 9.6%\n",
          "name": "stdout"
        }
      ]
    }
  ],
  "metadata": {
    "_draft": {
      "nbviewer_url": "https://gist.github.com/986d806e3b53e12b81f4b086034a369d"
    },
    "gist": {
      "id": "986d806e3b53e12b81f4b086034a369d",
      "data": {
        "description": "Census API––Percentage Change by Year––Proof of Concept",
        "public": true
      }
    },
    "kernelspec": {
      "name": "conda-env-jupyter-py",
      "display_name": "Python [conda env:jupyter]",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.3",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Percentage Change for Census Variables (by Year)\n- proof of concept\n- Use to get the pct change (by year) b/w any census variable (acs1, sfl)\n- Loads external 'nationalP_places.txt' file to get PlaceFips for a city\n- Myself, I need to get Place-specific changes between variables...Like the Population Growth Mean Income. Also, I don't always know which years are going to be valid. \n- This example searches a variable for its most current year and the most vintage year\n- The Census has a septuagintillion variables...hopefully future iterations will make this easier for me :)"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import requests\nfrom bs4 import BeautifulSoup\nimport pandas as pd\nimport numpy as np\nimport us\nimport json\n\nimport sqlite3\nDB = './data/sqlite3/acs1.db'\nTABLE = 'Variables'\n\nfrom IPython.core.interactiveshell import InteractiveShell\nInteractiveShell.ast_node_interactivity = \"all\"\n\njson_keys = json.load(open('./data/api_key.json'))\nCENSUS_API_KEY = json_keys['census-api']",
	"execution_count": 207,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Functions"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### Helper Functions"
	},
	{
	"metadata": {
	"code_folding": [
	0,
	7,
	18,
	33,
	56
	],
	"trusted": true
	},
	"cell_type": "code",
	"source": "def get_asc_variable(df, search, limit=10):\n '''Search in Variable and Label Columns \n get_asc_variable(df2, 'Population', 10)\n '''\n return df[(df.label.str.contains(search,case=False)) \|\n df.index.str.contains(search, case=False)][['label', 'concept']][0:limit]\n\n\ndef geoid(row):\n if row['PLACEFP'] != '00000':\n return '16000US' + row['STATEFP'] + row['PLACEFP']\n\n\n# elif row['city_fips'] != '00000':\n# return '16000US' + row['state_fips'] + row['city_fips']\n else:\n return 'unknown'\n\n\ndef load_national_places():\n df = pd.read_csv(\n 'https://www2.census.gov/geo/docs/reference/codes/files/national_places.txt',\n sep=\"\|\",\n encoding='ISO-8859-1',\n dtype={'STATEFP': 'str',\n 'PLACEFP': 'str',\n 'FUNCSTAT': 'str'})\n\n df['geoid'] = df.apply(lambda row: geoid(row), axis=1)\n df['name'] = df.PLACENAME.apply(lambda x: ' '.join(x.split(' ')[0:-1]))\n df['type'] = df.PLACENAME.apply(lambda x: x.split(' ')[-1])\n return df\n\n\ndef get_place_id(df, place, state):\n '''\n Helper Function to pct_change_var\n get_place_id(places,'Osage', 'MN')\n '''\n if not state.isnumeric():\n state_fips = us.states.lookup(state).fips\n else:\n state_fips = state\n\n try:\n place = df[(df.PLACENAME.str.contains(place, case=True))\n & (df.STATEFP == state_fips)\n & (df.TYPE.str.contains('County Subdivision') == False)][\n 'PLACEFP'].iloc[0]\n return str(place)\n\n except Exception as e:\n print(e)\n return 'unknown'\n pass\n\n\ndef vintage_or_current_var(r, places, vintage=False):\n ''' Helper Function to pct_change_var\n Find the First Valid Year with a Variable Value\n https://api.census.gov/data/2014/acs1?get=NAME,B02015_009E,B02015_009M&for=state:*\n https://api.census.gov/data/2014/acs1?get=NAME,B01001_001E&for=place:05000&in=state:48\n '''\n found = False\n\n place_id = get_place_id(places, r['city'], r['state'])\n state_fips = us.states.lookup(r['state']).fips\n\n # look for the end or the start year...and should we go backwards or forwards\n if vintage == True:\n start = r['range'][0]\n end = r['range'][-1]\n step = 1\n else:\n start = r['range'][-1]\n end = r['range'][0]\n step = -1 # reverse order\n #print(start, end, step)\n\n # Find a Year with a Valid Value\n for year in range(start, end, step):\n\n res = requests.get(\n '{0}/{1}/{2}?get={3}&for=place:{4}&in=state:{5}&key={6}'.format(\n r['base'], year, r['ds'], r['key'], place_id, state_fips, r[\n 'census_api_key']))\n\n if (res.status_code != 404):\n\n # return Variable (as key) with year and variable value (as vals)\n try:\n return [\n r['city'], r['state'], r['key'], r['meta'], year,\n int(res.json()[1][0])\n ]\n found = True\n except Exception as e:\n #print(e)\n pass\n else:\n found = False\n\n # Give Up: Never Found a Valid Year..Variable might not exist or URL is malformed\n if not found: return np.nan",
	"execution_count": 233,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### Main Function"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Main Functon\ndef pct_change_var(r):\n '''Main Function'''\n vintage = vintage_or_current_var(r, places, vintage=False)\n current = vintage_or_current_var(r, places, vintage=True)\n vintage_yr = vintage[4]\n vintage_val = vintage[-1]\n current_yr = current[4]\n current_val = current[-1]\n pct_change = abs((current[-1] - vintage[-1])) / vintage[-1]\n\n return [r['city'], r['state'], pct_change, r['key'], r['meta'],current_yr, current_val, vintage_yr, vintage_val]",
	"execution_count": 57,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "---\n## Loading/Cleaning"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### Load Place Fips Dataset (from census url)"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Load External PLACE Fips (national_places.txt)into a Dataframe...from Census Site\nplaces = load_national_places()\nplaces.sample(1)",
	"execution_count": 55,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 55,
	"data": {
	"text/plain": " STATE STATEFP PLACEFP PLACENAME TYPE FUNCSTAT \\\n23042 NY 36 02297 Antwerp town County Subdivision A \n\n COUNTY geoid name type \n23042 Jefferson County 16000US3602297 Antwerp town ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>STATE</th>\n <th>STATEFP</th>\n <th>PLACEFP</th>\n <th>PLACENAME</th>\n <th>TYPE</th>\n <th>FUNCSTAT</th>\n <th>COUNTY</th>\n <th>geoid</th>\n <th>name</th>\n <th>type</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>23042</th>\n <td>NY</td>\n <td>36</td>\n <td>02297</td>\n <td>Antwerp town</td>\n <td>County Subdivision</td>\n <td>A</td>\n <td>Jefferson County</td>\n <td>16000US3602297</td>\n <td>Antwerp</td>\n <td>town</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### Load ACS1 Variable Table (from census)"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Fairly long load...might want to load the file first rather than directly in pandas :\\\njson = pd.read_json('https://api.census.gov/data/2014/acs1/variables.json')\nacs1_vars = json.variables.apply(pd.Series) # Explode JSON into multiple columns\n\n# After the first json load...query sqlite instead...milliseconds faster ;)\n# con = sqlite3.connect(DB)\n# acs1_vars = pd.read_sql('Select * FROM {0} LIMIT {1}'.format(TABLE,2), con)\n# con.close()",
	"execution_count": 235,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### Alternate Asc1 Load––using Requests (probably faster)"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Alternate Loading Works--might be a lot faster\n# resp = requests.get('https://api.census.gov/data/2014/acs1/variables.json')\n# data = resp.json()\n# df = pd.DataFrame(data)\n# acs1_vars_v2 = df.variables.apply(pd.Series) # Explode JSON into multiple columns",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### Write Acs1 to SQLite"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df = acs1_vars.reset_index()[['label', 'concept', 'group']]\n\n# Write the Acs1 Table\ncon = sqlite3.connect(DB)\ndf.to_sql(TABLE, con=con, index=False, if_exists='replace')\ncon.close()\n\n# Database Check\ncon = sqlite3.connect(DB)\npd.read_sql('Select * FROM {0} LIMIT {1}'.format(TABLE,2), con)\ncon.close()",
	"execution_count": 234,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 234,
	"data": {
	"text/plain": " label concept \\\n0 American Indian Area/Alaska Native Area/Hawaii... Selectable Geographies \n1 American Indian Area/Alaska Native Area/Hawaii... Selectable Geographies \n\n group \n0 N/A \n1 N/A ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>label</th>\n <th>concept</th>\n <th>group</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>American Indian Area/Alaska Native Area/Hawaii...</td>\n <td>Selectable Geographies</td>\n <td>N/A</td>\n </tr>\n <tr>\n <th>1</th>\n <td>American Indian Area/Alaska Native Area/Hawaii...</td>\n <td>Selectable Geographies</td>\n <td>N/A</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Find Census Variables that Apply\nthere are...like what...64k of them?! Search around."
	},
	{
	"metadata": {
	"code_folding": [
	1
	],
	"trusted": true
	},
	"cell_type": "code",
	"source": "# Search within label (and index) for Population\nprint('Searching for Population...')\nget_asc_variable(acs1_vars, 'Population', limit=3)\n\nprint(\"Searching for part of Variable...\")\n# Search within label (and index) for Population\nget_asc_variable(acs1_vars, 'B02003', limit =3)",
	"execution_count": 221,
	"outputs": [
	{
	"output_type": "stream",
	"text": "Searching for Population...\n",
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"execution_count": 221,
	"data": {
	"text/plain": " label \\\nB02003_002E Population of one race: \nB02003_002M Margin of Error for!!Population of one race: \nB02003_003E Population of one race:!!White alone \n\n concept \nB02003_002E B02003. Detailed Race \nB02003_002M B02003. Detailed Race \nB02003_003E B02003. Detailed Race ",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>label</th>\n <th>concept</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>B02003_002E</th>\n <td>Population of one race:</td>\n <td>B02003. Detailed Race</td>\n </tr>\n <tr>\n <th>B02003_002M</th>\n <td>Margin of Error for!!Population of one race:</td>\n <td>B02003. Detailed Race</td>\n </tr>\n <tr>\n <th>B02003_003E</th>\n <td>Population of one race:!!White alone</td>\n <td>B02003. Detailed Race</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	},
	{
	"output_type": "stream",
	"text": "Searching for part of Variable...\n",
	"name": "stdout"
	},
	{
	"output_type": "execute_result",
	"execution_count": 221,
	"data": {
	"text/plain": " label concept\nB02003_001E Total: B02003. Detailed Race\nB02003_001M Margin of Error for!!Total: B02003. Detailed Race\nB02003_002E Population of one race: B02003. Detailed Race",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>label</th>\n <th>concept</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>B02003_001E</th>\n <td>Total:</td>\n <td>B02003. Detailed Race</td>\n </tr>\n <tr>\n <th>B02003_001M</th>\n <td>Margin of Error for!!Total:</td>\n <td>B02003. Detailed Race</td>\n </tr>\n <tr>\n <th>B02003_002E</th>\n <td>Population of one race:</td>\n <td>B02003. Detailed Race</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "---\n## Analysis\nproof of concept.. <br>My intention is to use this function in an pandas apply function---over a dataframe."
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### Get Percentage Change in Total Population for Austin, TX \nIn future, should work for any variable in the acs1, sf1, or acs5<br/>\nacs1 variables on https://api.census.gov/data/2014/acs1/variables.html"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# First and Last Valid Year Value for the Census Variable (Variable, Year, Value)\nparams = pd.Series({\n 'city': 'Austin',\n 'state': 'TX',\n 'key': 'B01003_001E',\n 'meta': 'Population Percentage Change', #Next rev:Get from asc1_vars\n 'range': [2010, 2018],\n 'ds': 'acs1',\n 'base': 'https://api.census.gov/data',\n 'census_api_key': CENSUS_API_KEY\n})\nresult = pct_change_var(params)\nresult",
	"execution_count": 222,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 222,
	"data": {
	"text/plain": "['Austin',\n 'TX',\n 0.09577287946428571,\n 'B01003_001E',\n 'Population Percentage Change',\n 2012,\n 842595,\n 2015,\n 931840]"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "print('For {0}, {1}, variable {2} ({3}) from years {4}-{5} is {6:.1%}'.format(\n result[0], # Austin\n result[1], # Texas\n result[3], # B01001_001E \n result[4], # Population Percentage Change (Description)\n result[5], # 2012 (Most Vintage Year)\n result[7], # 2015 (Most Curent Year)\n result[2])) # Pop Percentage Change Between Years",
	"execution_count": 223,
	"outputs": [
	{
	"output_type": "stream",
	"text": "For Austin, TX, variable B01003_001E (Population Percentage Change) from years 2012-2015 is 9.6%\n",
	"name": "stdout"
	}
	]
	}
	],
	"metadata": {
	"_draft": {
	"nbviewer_url": "https://gist.github.com/986d806e3b53e12b81f4b086034a369d"
	},
	"gist": {
	"id": "986d806e3b53e12b81f4b086034a369d",
	"data": {
	"description": "Census API––Percentage Change by Year––Proof of Concept",
	"public": true
	}
	},
	"kernelspec": {
	"name": "conda-env-jupyter-py",
	"display_name": "Python [conda env:jupyter]",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.6.3",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found