Created
September 21, 2017 21:48
-
-
Save mythmon/749921b919663c5f5931b4abac7cb73f to your computer and use it in GitHub Desktop.
uptake 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "bucket = \"telemetry-parquet\"\n", | |
| "prefix = \"main_summary/v4\"\n", | |
| "s3path = \"s3://{}/{}\".format(bucket, prefix)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "CPU times: user 28 ms, sys: 8 ms, total: 36 ms\n", | |
| "Wall time: 3min 15s\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%time df = spark.read.parquet(s3path)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "root\n", | |
| " |-- document_id: string (nullable = true)\n", | |
| " |-- client_id: string (nullable = true)\n", | |
| " |-- channel: string (nullable = true)\n", | |
| " |-- normalized_channel: string (nullable = true)\n", | |
| " |-- country: string (nullable = true)\n", | |
| " |-- city: string (nullable = true)\n", | |
| " |-- os: string (nullable = true)\n", | |
| " |-- os_version: string (nullable = true)\n", | |
| " |-- os_service_pack_major: long (nullable = true)\n", | |
| " |-- os_service_pack_minor: long (nullable = true)\n", | |
| " |-- windows_build_number: long (nullable = true)\n", | |
| " |-- windows_ubr: long (nullable = true)\n", | |
| " |-- install_year: long (nullable = true)\n", | |
| " |-- profile_creation_date: long (nullable = true)\n", | |
| " |-- subsession_start_date: string (nullable = true)\n", | |
| " |-- subsession_length: long (nullable = true)\n", | |
| " |-- distribution_id: string (nullable = true)\n", | |
| " |-- submission_date: string (nullable = true)\n", | |
| " |-- sync_configured: boolean (nullable = true)\n", | |
| " |-- sync_count_desktop: integer (nullable = true)\n", | |
| " |-- sync_count_mobile: integer (nullable = true)\n", | |
| " |-- app_build_id: string (nullable = true)\n", | |
| " |-- app_display_version: string (nullable = true)\n", | |
| " |-- app_name: string (nullable = true)\n", | |
| " |-- app_version: string (nullable = true)\n", | |
| " |-- timestamp: long (nullable = true)\n", | |
| " |-- env_build_id: string (nullable = true)\n", | |
| " |-- env_build_version: string (nullable = true)\n", | |
| " |-- env_build_arch: string (nullable = true)\n", | |
| " |-- e10s_enabled: boolean (nullable = true)\n", | |
| " |-- e10s_cohort: string (nullable = true)\n", | |
| " |-- locale: string (nullable = true)\n", | |
| " |-- active_experiment_id: string (nullable = true)\n", | |
| " |-- active_experiment_branch: string (nullable = true)\n", | |
| " |-- reason: string (nullable = true)\n", | |
| " |-- timezone_offset: integer (nullable = true)\n", | |
| " |-- plugin_hangs: integer (nullable = true)\n", | |
| " |-- aborts_plugin: integer (nullable = true)\n", | |
| " |-- aborts_content: integer (nullable = true)\n", | |
| " |-- aborts_gmplugin: integer (nullable = true)\n", | |
| " |-- crashes_detected_plugin: integer (nullable = true)\n", | |
| " |-- crashes_detected_content: integer (nullable = true)\n", | |
| " |-- crashes_detected_gmplugin: integer (nullable = true)\n", | |
| " |-- crash_submit_attempt_main: integer (nullable = true)\n", | |
| " |-- crash_submit_attempt_content: integer (nullable = true)\n", | |
| " |-- crash_submit_attempt_plugin: integer (nullable = true)\n", | |
| " |-- crash_submit_success_main: integer (nullable = true)\n", | |
| " |-- crash_submit_success_content: integer (nullable = true)\n", | |
| " |-- crash_submit_success_plugin: integer (nullable = true)\n", | |
| " |-- active_addons_count: long (nullable = true)\n", | |
| " |-- flash_version: string (nullable = true)\n", | |
| " |-- vendor: string (nullable = true)\n", | |
| " |-- is_default_browser: boolean (nullable = true)\n", | |
| " |-- default_search_engine_data_name: string (nullable = true)\n", | |
| " |-- default_search_engine: string (nullable = true)\n", | |
| " |-- loop_activity_counter: struct (nullable = true)\n", | |
| " | |-- open_panel: integer (nullable = true)\n", | |
| " | |-- open_conversation: integer (nullable = true)\n", | |
| " | |-- room_open: integer (nullable = true)\n", | |
| " | |-- room_share: integer (nullable = true)\n", | |
| " | |-- room_delete: integer (nullable = true)\n", | |
| " |-- devtools_toolbox_opened_count: integer (nullable = true)\n", | |
| " |-- client_submission_date: string (nullable = true)\n", | |
| " |-- places_bookmarks_count: integer (nullable = true)\n", | |
| " |-- places_pages_count: integer (nullable = true)\n", | |
| " |-- push_api_notification_received: integer (nullable = true)\n", | |
| " |-- web_notification_shown: integer (nullable = true)\n", | |
| " |-- popup_notification_stats: map (nullable = true)\n", | |
| " | |-- key: string\n", | |
| " | |-- value: struct (valueContainsNull = true)\n", | |
| " | | |-- offered: integer (nullable = true)\n", | |
| " | | |-- action_1: integer (nullable = true)\n", | |
| " | | |-- action_2: integer (nullable = true)\n", | |
| " | | |-- action_3: integer (nullable = true)\n", | |
| " | | |-- action_last: integer (nullable = true)\n", | |
| " | | |-- dismissal_click_elsewhere: integer (nullable = true)\n", | |
| " | | |-- dismissal_leave_page: integer (nullable = true)\n", | |
| " | | |-- dismissal_close_button: integer (nullable = true)\n", | |
| " | | |-- dismissal_not_now: integer (nullable = true)\n", | |
| " | | |-- open_submenu: integer (nullable = true)\n", | |
| " | | |-- learn_more: integer (nullable = true)\n", | |
| " | | |-- reopen_offered: integer (nullable = true)\n", | |
| " | | |-- reopen_action_1: integer (nullable = true)\n", | |
| " | | |-- reopen_action_2: integer (nullable = true)\n", | |
| " | | |-- reopen_action_3: integer (nullable = true)\n", | |
| " | | |-- reopen_action_last: integer (nullable = true)\n", | |
| " | | |-- reopen_dismissal_click_elsewhere: integer (nullable = true)\n", | |
| " | | |-- reopen_dismissal_leave_page: integer (nullable = true)\n", | |
| " | | |-- reopen_dismissal_close_button: integer (nullable = true)\n", | |
| " | | |-- reopen_dismissal_not_now: integer (nullable = true)\n", | |
| " | | |-- reopen_open_submenu: integer (nullable = true)\n", | |
| " | | |-- reopen_learn_more: integer (nullable = true)\n", | |
| " |-- search_counts: array (nullable = true)\n", | |
| " | |-- element: struct (containsNull = true)\n", | |
| " | | |-- engine: string (nullable = true)\n", | |
| " | | |-- source: string (nullable = true)\n", | |
| " | | |-- count: long (nullable = true)\n", | |
| " |-- submission_date_s3: string (nullable = true)\n", | |
| " |-- sample_id: string (nullable = true)\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "df.printSchema()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "anaconda-cloud": {}, | |
| "kernelspec": { | |
| "display_name": "Python [default]", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.12" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 1 | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # coding: utf-8 | |
| # In[1]: | |
| bucket = "telemetry-parquet" | |
| prefix = "main_summary/v4" | |
| s3path = "s3://{}/{}".format(bucket, prefix) | |
| # In[2]: | |
| get_ipython().magic(u'time df = spark.read.parquet(s3path)') | |
| # In[3]: | |
| df.printSchema() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment