Created
April 3, 2018 15:35
-
-
Save ricalanis/b663c80889746cc29cbee57e28419ded to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "curl 'http://pgjesin.gob.mx:8090/desaparecidos/Lista_Desaparecidos.asp' -H 'Pragma: no-cache' -H 'Origin: http://pgjesin.gob.mx:8090' -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: es-ES,es;q=0.9,en;q=0.8' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' -H 'Content-Type: application/x-www-form-urlencoded' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Cache-Control: no-cache' -H 'Referer: http://pgjesin.gob.mx:8090/desaparecidos/Busqueda_Desaparecidos.asp?estado=8&nomestado=Chihuahua' -H 'Cookie: ASPSESSIONIDQQQTCSBT=EDHJEPHBGABFNIGJDGLJLAHP' -H 'Connection: keep-alive' --data 'Estados=8&Tipos=0&Sexos=0&Edad1=0&Edad2=100&txtpaterno=&txtmaterno=&txtNombre=&rgfotos=1&rgordenado=1&Complexion=0&Tez=0&Pelo=0&Color=0&Labios=0&Cara=0&Ojos=0&Ojoscolor=0&Boca=0&Cejas=0&Nariz=0&Frente=0&txtropa=&txtse%F1as=&Submit1=Buscar' --compressed" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from selenium import webdriver\n", | |
| "from selenium.webdriver.common.keys import Keys\n", | |
| "from bs4 import BeautifulSoup\n", | |
| "import requests\n", | |
| "import pandas\n", | |
| "import simplejson" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "driver = webdriver.Chrome()\n", | |
| "driver.get(\"http://pgjesin.gob.mx:8090/desaparecidos/Busqueda_Desaparecidos.asp?estado=0&nomestado=Todos\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "button_start = driver.find_element_by_id(\"Submit1\")\n", | |
| "button_start.click()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Collecting PyPDF\n", | |
| " Downloading pyPdf-1.13.tar.gz\n", | |
| "Building wheels for collected packages: PyPDF\n", | |
| " Running setup.py bdist_wheel for PyPDF ... \u001b[?25ldone\n", | |
| "\u001b[?25h Stored in directory: /Users/rdalanist/Library/Caches/pip/wheels/44/8b/98/27e1f4566acef44a7fbc2b0db6a10e2a493833e2d2e34fa110\n", | |
| "Successfully built PyPDF\n", | |
| "Installing collected packages: PyPDF\n", | |
| "Successfully installed PyPDF-1.13\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!pip3 install PyPDF" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "ModuleNotFoundError", | |
| "evalue": "No module named 'pypdf'", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m-------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-34-fc75eae59478>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpypdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pypdf'" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import pypdf" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import requests" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "1\n", | |
| "3\n", | |
| "4\n", | |
| "5\n", | |
| "6\n", | |
| "7\n", | |
| "8\n", | |
| "9\n", | |
| "10\n", | |
| "11\n", | |
| "12\n", | |
| "13\n", | |
| "14\n", | |
| "15\n", | |
| "16\n", | |
| "17\n", | |
| "18\n", | |
| "19\n", | |
| "20\n", | |
| "21\n", | |
| "22\n", | |
| "23\n", | |
| "24\n", | |
| "25\n", | |
| "26\n", | |
| "27\n", | |
| "28\n", | |
| "29\n", | |
| "30\n", | |
| "31\n", | |
| "32\n", | |
| "34\n", | |
| "36\n", | |
| "37\n", | |
| "38\n", | |
| "39\n", | |
| "40\n", | |
| "41\n", | |
| "42\n", | |
| "43\n", | |
| "44\n", | |
| "45\n", | |
| "46\n", | |
| "47\n", | |
| "48\n", | |
| "49\n", | |
| "50\n", | |
| "51\n", | |
| "52\n", | |
| "53\n", | |
| "54\n", | |
| "55\n", | |
| "56\n", | |
| "58\n", | |
| "59\n", | |
| "60\n", | |
| "61\n", | |
| "62\n", | |
| "63\n", | |
| "64\n", | |
| "65\n", | |
| "66\n", | |
| "67\n", | |
| "69\n", | |
| "70\n", | |
| "71\n", | |
| "72\n", | |
| "73\n", | |
| "74\n", | |
| "75\n", | |
| "76\n", | |
| "77\n", | |
| "78\n", | |
| "79\n", | |
| "80\n", | |
| "81\n", | |
| "82\n", | |
| "83\n", | |
| "84\n", | |
| "85\n", | |
| "86\n", | |
| "87\n", | |
| "88\n", | |
| "89\n", | |
| "90\n", | |
| "91\n", | |
| "92\n", | |
| "93\n", | |
| "94\n", | |
| "95\n", | |
| "96\n", | |
| "97\n", | |
| "98\n", | |
| "99\n", | |
| "100\n", | |
| "101\n", | |
| "102\n", | |
| "103\n", | |
| "104\n", | |
| "105\n", | |
| "106\n", | |
| "107\n", | |
| "108\n", | |
| "109\n", | |
| "110\n", | |
| "111\n", | |
| "112\n", | |
| "113\n", | |
| "114\n", | |
| "115\n", | |
| "116\n", | |
| "117\n", | |
| "118\n", | |
| "119\n", | |
| "120\n", | |
| "121\n", | |
| "122\n", | |
| "123\n", | |
| "124\n", | |
| "125\n", | |
| "126\n", | |
| "127\n", | |
| "128\n", | |
| "130\n", | |
| "131\n", | |
| "132\n", | |
| "133\n", | |
| "134\n", | |
| "135\n", | |
| "136\n", | |
| "137\n", | |
| "138\n", | |
| "139\n", | |
| "140\n", | |
| "141\n", | |
| "142\n", | |
| "143\n", | |
| "144\n", | |
| "145\n", | |
| "146\n", | |
| "147\n", | |
| "148\n", | |
| "149\n", | |
| "150\n", | |
| "151\n", | |
| "152\n", | |
| "153\n", | |
| "154\n", | |
| "155\n", | |
| "156\n", | |
| "157\n", | |
| "158\n", | |
| "159\n", | |
| "160\n", | |
| "161\n", | |
| "162\n", | |
| "163\n", | |
| "164\n", | |
| "165\n", | |
| "166\n", | |
| "167\n", | |
| "168\n", | |
| "169\n", | |
| "170\n", | |
| "171\n", | |
| "172\n", | |
| "173\n", | |
| "174\n", | |
| "175\n", | |
| "176\n", | |
| "177\n", | |
| "178\n", | |
| "179\n", | |
| "180\n", | |
| "181\n", | |
| "182\n", | |
| "183\n", | |
| "184\n", | |
| "186\n", | |
| "187\n", | |
| "188\n", | |
| "189\n", | |
| "190\n", | |
| "191\n", | |
| "192\n", | |
| "193\n", | |
| "194\n", | |
| "195\n", | |
| "196\n", | |
| "197\n", | |
| "198\n", | |
| "199\n", | |
| "200\n", | |
| "201\n", | |
| "202\n", | |
| "203\n", | |
| "204\n", | |
| "205\n", | |
| "209\n", | |
| "210\n", | |
| "211\n", | |
| "212\n", | |
| "213\n", | |
| "214\n", | |
| "215\n", | |
| "216\n", | |
| "217\n", | |
| "218\n", | |
| "219\n", | |
| "220\n", | |
| "221\n", | |
| "222\n", | |
| "223\n", | |
| "224\n", | |
| "225\n", | |
| "226\n", | |
| "227\n", | |
| "228\n", | |
| "229\n", | |
| "230\n", | |
| "231\n", | |
| "232\n", | |
| "233\n", | |
| "234\n", | |
| "235\n", | |
| "236\n", | |
| "237\n", | |
| "238\n", | |
| "239\n", | |
| "240\n", | |
| "241\n", | |
| "242\n", | |
| "243\n", | |
| "244\n", | |
| "245\n", | |
| "246\n", | |
| "247\n", | |
| "248\n", | |
| "249\n", | |
| "250\n", | |
| "251\n", | |
| "252\n", | |
| "253\n", | |
| "254\n", | |
| "255\n", | |
| "256\n", | |
| "257\n", | |
| "258\n", | |
| "259\n", | |
| "260\n", | |
| "261\n", | |
| "262\n", | |
| "263\n", | |
| "264\n", | |
| "265\n", | |
| "266\n", | |
| "267\n", | |
| "268\n", | |
| "269\n", | |
| "270\n", | |
| "272\n", | |
| "273\n", | |
| "274\n", | |
| "275\n", | |
| "276\n", | |
| "277\n", | |
| "278\n", | |
| "280\n", | |
| "281\n", | |
| "282\n", | |
| "283\n", | |
| "284\n", | |
| "285\n", | |
| "286\n", | |
| "287\n", | |
| "288\n", | |
| "289\n", | |
| "290\n", | |
| "291\n", | |
| "292\n", | |
| "293\n", | |
| "294\n", | |
| "296\n", | |
| "298\n", | |
| "299\n", | |
| "300\n", | |
| "301\n", | |
| "302\n", | |
| "303\n", | |
| "304\n", | |
| "305\n", | |
| "306\n", | |
| "307\n", | |
| "308\n", | |
| "309\n", | |
| "310\n", | |
| "311\n", | |
| "312\n", | |
| "313\n", | |
| "314\n", | |
| "315\n", | |
| "316\n", | |
| "317\n", | |
| "318\n", | |
| "319\n", | |
| "320\n", | |
| "321\n", | |
| "322\n", | |
| "323\n", | |
| "324\n", | |
| "326\n", | |
| "327\n", | |
| "328\n", | |
| "329\n", | |
| "330\n", | |
| "331\n", | |
| "332\n", | |
| "333\n", | |
| "334\n", | |
| "335\n", | |
| "336\n", | |
| "338\n", | |
| "339\n", | |
| "340\n", | |
| "341\n", | |
| "342\n", | |
| "343\n", | |
| "344\n", | |
| "345\n", | |
| "346\n", | |
| "347\n", | |
| "348\n", | |
| "349\n", | |
| "350\n", | |
| "351\n", | |
| "352\n", | |
| "353\n", | |
| "354\n", | |
| "355\n", | |
| "356\n", | |
| "357\n", | |
| "358\n", | |
| "359\n", | |
| "360\n", | |
| "361\n", | |
| "362\n", | |
| "363\n", | |
| "364\n", | |
| "365\n", | |
| "366\n", | |
| "367\n", | |
| "368\n", | |
| "369\n", | |
| "370\n", | |
| "371\n", | |
| "372\n", | |
| "373\n", | |
| "374\n", | |
| "375\n", | |
| "376\n", | |
| "377\n", | |
| "378\n", | |
| "379\n", | |
| "380\n", | |
| "381\n", | |
| "382\n", | |
| "383\n", | |
| "384\n", | |
| "385\n", | |
| "386\n", | |
| "387\n", | |
| "388\n", | |
| "389\n", | |
| "391\n", | |
| "392\n", | |
| "393\n", | |
| "394\n", | |
| "395\n", | |
| "396\n", | |
| "397\n", | |
| "398\n", | |
| "399\n", | |
| "400\n", | |
| "401\n", | |
| "402\n", | |
| "403\n", | |
| "404\n", | |
| "405\n", | |
| "406\n", | |
| "407\n", | |
| "408\n", | |
| "409\n", | |
| "410\n", | |
| "411\n", | |
| "412\n", | |
| "413\n", | |
| "414\n", | |
| "415\n", | |
| "416\n", | |
| "417\n", | |
| "418\n", | |
| "419\n", | |
| "420\n", | |
| "421\n", | |
| "422\n", | |
| "423\n", | |
| "424\n", | |
| "425\n", | |
| "426\n", | |
| "427\n", | |
| "428\n", | |
| "429\n", | |
| "430\n", | |
| "431\n", | |
| "432\n", | |
| "433\n", | |
| "434\n", | |
| "435\n", | |
| "436\n", | |
| "437\n", | |
| "438\n", | |
| "439\n", | |
| "440\n", | |
| "441\n", | |
| "442\n", | |
| "443\n", | |
| "444\n", | |
| "445\n", | |
| "447\n", | |
| "448\n", | |
| "449\n", | |
| "450\n", | |
| "451\n", | |
| "452\n", | |
| "453\n", | |
| "454\n", | |
| "455\n", | |
| "456\n", | |
| "457\n", | |
| "458\n", | |
| "459\n", | |
| "460\n", | |
| "461\n", | |
| "462\n", | |
| "463\n", | |
| "464\n", | |
| "465\n", | |
| "466\n", | |
| "467\n", | |
| "468\n", | |
| "469\n", | |
| "470\n", | |
| "471\n", | |
| "472\n", | |
| "473\n", | |
| "474\n", | |
| "475\n", | |
| "476\n", | |
| "477\n", | |
| "478\n", | |
| "479\n", | |
| "480\n", | |
| "481\n", | |
| "482\n", | |
| "483\n", | |
| "484\n", | |
| "485\n", | |
| "486\n", | |
| "487\n", | |
| "488\n", | |
| "489\n", | |
| "490\n", | |
| "491\n", | |
| "492\n", | |
| "493\n", | |
| "494\n", | |
| "495\n", | |
| "496\n", | |
| "497\n", | |
| "498\n", | |
| "499\n", | |
| "500\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "output_guanajuato = []\n", | |
| "for i in range(1,501,1):\n", | |
| " try:\n", | |
| " data =requests.get(\"https://portal.pgjguanajuato.gob.mx:8443/ProcurApp/api/PortalAMBER/\" + str(i)).json()[\"ficha\"]\n", | |
| " output_guanajuato.append(data)\n", | |
| " print(i)\n", | |
| " except:\n", | |
| " next" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "pd.DataFrame([data[0] for data in output_guanajuato]).to_csv(\"AMBER_Guanajuato.csv\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "NameError", | |
| "evalue": "name 'output' is not defined", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m-------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-9-838337db0b65>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;31mNameError\u001b[0m: name 'output' is not defined" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "output" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.5" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment