Created
December 25, 2021 21:03
-
-
Save vol1ura/0e2f8aa924753c763a2c0bb868705a8e to your computer and use it in GitHub Desktop.
clubmates_engirunners.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "clubmates_engirunners.ipynb", | |
| "provenance": [], | |
| "authorship_tag": "ABX9TyOWSRcN76BxAOsrr77rG3Hz", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "widgets": { | |
| "application/vnd.jupyter.widget-state+json": { | |
| "4a3d8cc726d04888a03aa610ceaced1e": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HBoxModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_view_name": "HBoxView", | |
| "_dom_classes": [], | |
| "_model_name": "HBoxModel", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "box_style": "", | |
| "layout": "IPY_MODEL_820063756eb54fc8a39a84db865d7ea4", | |
| "_model_module": "@jupyter-widgets/controls", | |
| "children": [ | |
| "IPY_MODEL_8c8627c52d344f8cac5b209f2eaa56f9", | |
| "IPY_MODEL_294f52e6617b4f3b922bd647419e6cd2", | |
| "IPY_MODEL_4d869a25e2f54da2a28c926279e511db" | |
| ] | |
| } | |
| }, | |
| "820063756eb54fc8a39a84db865d7ea4": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "8c8627c52d344f8cac5b209f2eaa56f9": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_view_name": "HTMLView", | |
| "style": "IPY_MODEL_770911812c774074a283d41798d1ff5b", | |
| "_dom_classes": [], | |
| "description": "", | |
| "_model_name": "HTMLModel", | |
| "placeholder": "", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": "100%", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_be94329f0da54421a3f16b25bbc07548" | |
| } | |
| }, | |
| "294f52e6617b4f3b922bd647419e6cd2": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "FloatProgressModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_view_name": "ProgressView", | |
| "style": "IPY_MODEL_cc345771c1374fc5a18e67aad15358fb", | |
| "_dom_classes": [], | |
| "description": "", | |
| "_model_name": "FloatProgressModel", | |
| "bar_style": "success", | |
| "max": 108, | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": 108, | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "orientation": "horizontal", | |
| "min": 0, | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_a2238e026f2748748920d7dcadf8f93a" | |
| } | |
| }, | |
| "4d869a25e2f54da2a28c926279e511db": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "HTMLModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_view_name": "HTMLView", | |
| "style": "IPY_MODEL_139c00373a514ceb9edbc7710431483b", | |
| "_dom_classes": [], | |
| "description": "", | |
| "_model_name": "HTMLModel", | |
| "placeholder": "", | |
| "_view_module": "@jupyter-widgets/controls", | |
| "_model_module_version": "1.5.0", | |
| "value": " 108/108 [17:16<00:00, 8.81s/it]", | |
| "_view_count": null, | |
| "_view_module_version": "1.5.0", | |
| "description_tooltip": null, | |
| "_model_module": "@jupyter-widgets/controls", | |
| "layout": "IPY_MODEL_2c502534ff18428187f87629a87bfc8d" | |
| } | |
| }, | |
| "770911812c774074a283d41798d1ff5b": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "DescriptionStyleModel", | |
| "description_width": "", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "be94329f0da54421a3f16b25bbc07548": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "cc345771c1374fc5a18e67aad15358fb": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "ProgressStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "ProgressStyleModel", | |
| "description_width": "", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "bar_color": null, | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "a2238e026f2748748920d7dcadf8f93a": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| }, | |
| "139c00373a514ceb9edbc7710431483b": { | |
| "model_module": "@jupyter-widgets/controls", | |
| "model_name": "DescriptionStyleModel", | |
| "model_module_version": "1.5.0", | |
| "state": { | |
| "_view_name": "StyleView", | |
| "_model_name": "DescriptionStyleModel", | |
| "description_width": "", | |
| "_view_module": "@jupyter-widgets/base", | |
| "_model_module_version": "1.5.0", | |
| "_view_count": null, | |
| "_view_module_version": "1.2.0", | |
| "_model_module": "@jupyter-widgets/controls" | |
| } | |
| }, | |
| "2c502534ff18428187f87629a87bfc8d": { | |
| "model_module": "@jupyter-widgets/base", | |
| "model_name": "LayoutModel", | |
| "model_module_version": "1.2.0", | |
| "state": { | |
| "_view_name": "LayoutView", | |
| "grid_template_rows": null, | |
| "right": null, | |
| "justify_content": null, | |
| "_view_module": "@jupyter-widgets/base", | |
| "overflow": null, | |
| "_model_module_version": "1.2.0", | |
| "_view_count": null, | |
| "flex_flow": null, | |
| "width": null, | |
| "min_width": null, | |
| "border": null, | |
| "align_items": null, | |
| "bottom": null, | |
| "_model_module": "@jupyter-widgets/base", | |
| "top": null, | |
| "grid_column": null, | |
| "overflow_y": null, | |
| "overflow_x": null, | |
| "grid_auto_flow": null, | |
| "grid_area": null, | |
| "grid_template_columns": null, | |
| "flex": null, | |
| "_model_name": "LayoutModel", | |
| "justify_items": null, | |
| "grid_row": null, | |
| "max_height": null, | |
| "align_content": null, | |
| "visibility": null, | |
| "align_self": null, | |
| "height": null, | |
| "min_height": null, | |
| "padding": null, | |
| "grid_auto_rows": null, | |
| "grid_gap": null, | |
| "max_width": null, | |
| "order": null, | |
| "_view_module_version": "1.2.0", | |
| "grid_template_areas": null, | |
| "object_position": null, | |
| "object_fit": null, | |
| "grid_auto_columns": null, | |
| "margin": null, | |
| "display": null, | |
| "left": null | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/vol1ura/0e2f8aa924753c763a2c0bb868705a8e/clubmates.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "id": "zRg0J0r6ztxV" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import random\n", | |
| "import re\n", | |
| "import requests\n", | |
| "import time\n", | |
| "from tqdm.notebook import tqdm\n", | |
| "\n", | |
| "pd.set_option('display.max_rows', None)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "club_id = 24630 # ENGIRUNNERS" | |
| ], | |
| "metadata": { | |
| "id": "clo0v2xzzyLU" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "headers = {\n", | |
| " 'Host': 'www.parkrun.ru',\n", | |
| " 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0',\n", | |
| " 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\n", | |
| " 'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',\n", | |
| " 'Accept-Encoding': 'gzip, deflate, br',\n", | |
| " 'Connection': 'keep-alive',\n", | |
| " 'Upgrade-Insecure-Requests': '1',\n", | |
| " 'Sec-GPC': '1',\n", | |
| " 'TE': 'Trailers'\n", | |
| " }" | |
| ], | |
| "metadata": { | |
| "id": "JMntqd19z4bw" | |
| }, | |
| "execution_count": 3, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "page_all_results = requests.get('https://www.parkrun.ru/results/courserecords/', headers=headers)\n", | |
| "data = pd.read_html(page_all_results.text)[0]\n", | |
| "russian_parkruns = data[data.columns[0]]" | |
| ], | |
| "metadata": { | |
| "id": "ctIMuJuVz7T9" | |
| }, | |
| "execution_count": 4, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "dfs = []\n", | |
| "for parkrun in tqdm(russian_parkruns):\n", | |
| " time.sleep(3 + 5*random.random())\n", | |
| " parkrun_trim = re.sub(r'[\\s-]', '', parkrun)\n", | |
| " url = f'https://www.parkrun.ru/{parkrun_trim}/results/clubhistory/?clubNum={club_id}'\n", | |
| " club_results = requests.get(url, headers=headers)\n", | |
| " try:\n", | |
| " df = pd.read_html(club_results.text)[0]\n", | |
| " dfs.append(df[df.columns[0]])\n", | |
| " except:\n", | |
| " print('ОШИБКА - операция завершилась досрочно. Паркран временно заблокировал IP.')\n", | |
| " break" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 49, | |
| "referenced_widgets": [ | |
| "4a3d8cc726d04888a03aa610ceaced1e", | |
| "820063756eb54fc8a39a84db865d7ea4", | |
| "8c8627c52d344f8cac5b209f2eaa56f9", | |
| "294f52e6617b4f3b922bd647419e6cd2", | |
| "4d869a25e2f54da2a28c926279e511db", | |
| "770911812c774074a283d41798d1ff5b", | |
| "be94329f0da54421a3f16b25bbc07548", | |
| "cc345771c1374fc5a18e67aad15358fb", | |
| "a2238e026f2748748920d7dcadf8f93a", | |
| "139c00373a514ceb9edbc7710431483b", | |
| "2c502534ff18428187f87629a87bfc8d" | |
| ] | |
| }, | |
| "id": "pB86PSu40JiI", | |
| "outputId": "c8369b97-53c8-4b8b-b0aa-3a195e802fc3" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "application/vnd.jupyter.widget-view+json": { | |
| "model_id": "4a3d8cc726d04888a03aa610ceaced1e", | |
| "version_minor": 0, | |
| "version_major": 2 | |
| }, | |
| "text/plain": [ | |
| " 0%| | 0/108 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "def last_name_first(full_name: str):\n", | |
| " names = full_name.split()\n", | |
| " last_name = names.pop(1).capitalize()\n", | |
| " names.insert(0, last_name)\n", | |
| " return ' '.join(names)" | |
| ], | |
| "metadata": { | |
| "id": "JGv4TY_5LOho" | |
| }, | |
| "execution_count": 6, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "pd.concat(dfs).drop_duplicates(keep='last').apply(last_name_first).sort_values().reset_index(drop=True).shift(1, fill_value='________Фамилия_Имя___')" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "u-ij29bnAOaq", | |
| "outputId": "1462dc7f-bf69-4293-9944-84c7dee29dd2" | |
| }, | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "0 ________Фамилия_Имя___\n", | |
| "1 Kondratenko Gleb\n", | |
| "2 Martynov Dmitry\n", | |
| "3 Smirnov Kirill\n", | |
| "4 Акинин Алексей\n", | |
| "5 Бакланов Михаил\n", | |
| "6 Ванаг Константин\n", | |
| "7 Дмитренко Наталья\n", | |
| "8 Зенов Роман\n", | |
| "9 Иванов Сергей\n", | |
| "10 Колокольников Алексей\n", | |
| "11 Комаров Алексей\n", | |
| "12 Кравцов Даниил\n", | |
| "13 Лавлинская Дарья\n", | |
| "14 Мартынова Диана\n", | |
| "15 Маяков Денис\n", | |
| "16 Нестеров Алексей\n", | |
| "17 Осипов Федор\n", | |
| "18 Петров Дмитрий\n", | |
| "19 Пьянов Артем\n", | |
| "20 Самошин Илья\n", | |
| "21 Симонов Юрий\n", | |
| "22 Сосновский Виктор\n", | |
| "23 Телепень Николай\n", | |
| "24 Титаренко Наталья\n", | |
| "25 Ходакова Людмила\n", | |
| "26 Шемякина Надежда\n", | |
| "27 Щукина Ольга\n", | |
| "Name: (Unnamed: 0_level_0, Участник), dtype: object" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 7 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "" | |
| ], | |
| "metadata": { | |
| "id": "526myT8CFL74" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment