Starting Vim
vim [file1] [file2] ...
| """ | |
| The schemas that Spark produces for DataFrames are typically | |
| nested, and these nested schemas are quite difficult to work with | |
| interactively. In many cases, it's possible to flatten a schema | |
| into a single level of column names. | |
| """ | |
| import typing as T | |
| import cytoolz.curried as tz |
| # A simple cheat sheet of Spark Dataframe syntax | |
| # Current for Spark 1.6.1 | |
| # import statements | |
| #from pyspark.sql import SQLContext | |
| #from pyspark.sql.types import * | |
| #from pyspark.sql.functions import * | |
| from pyspark.sql import functions as F | |
| #SparkContext available as sc, HiveContext available as sqlContext. |
| """ | |
| Convert Pandas DFs in an HDFStore to parquet files for better compatibility | |
| with Spark. | |
| Run from the command line with: | |
| spark-submit --driver-memory 4g --master 'local[*]' hdf5_to_parquet.py | |
| """ | |
| import pandas as pd |
| #!/bin/bash | |
| ##################################################### | |
| # Name: Bash CheatSheet for Mac OSX | |
| # | |
| # A little overlook of the Bash basics | |
| # | |
| # Usage: | |
| # | |
| # Author: J. Le Coupanec | |
| # Date: 2014/11/04 |
| class BSTnode(object): | |
| """ | |
| Representation of a node in a binary search tree. | |
| Has a left child, right child, and key value, and stores its subtree size. | |
| """ | |
| def __init__(self, parent, t): | |
| """Create a new leaf with key t.""" | |
| self.key = t | |
| self.parent = parent | |
| self.left = None |
| from collections import defaultdict | |
| from heapq import * | |
| def dijkstra(edges, f, t): | |
| g = defaultdict(list) | |
| for l,r,c in edges: | |
| g[l].append((c,r)) | |
| q, seen, mins = [(0,f,())], set(), {f: 0} | |
| while q: |
| import sys | |
| from pyspark.context import SparkContext | |
| from numpy import array, random as np_random | |
| from sklearn import linear_model as lm | |
| from sklearn.base import copy | |
| N = 10000 # Number of data points | |
| D = 10 # Numer of dimensions | |
| ITERATIONS = 5 |