Created
May 16, 2018 15:06
-
-
Save NDari/c1fb65f0b304975d367fc58ced3fa340 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -ex | |
| JUPYTER_PASSWORD=${1:-"<YOUR_PASSWORD_HERE>"} | |
| NOTEBOOK_DIR=${2:-"<PATH_TO_S3_NOTEBOOK_BUCKET>"} | |
| # home backup | |
| if [ ! -d /mnt/home_backup ]; then | |
| sudo mkdir /mnt/home_backup | |
| sudo cp -a /home/* /mnt/home_backup | |
| fi | |
| # mount home to /mnt | |
| if [ ! -d /mnt/home ]; then | |
| sudo mv /home/ /mnt/ | |
| sudo ln -s /mnt/home /home | |
| fi | |
| # install latest python using the miniconda installer | |
| aws s3 cp <PATH_TO_MINICONDA_INSTALL_SCRIPT_ON_S3> . | |
| bash <MINICONDA_INSTALL_SCRIPT> -b -p $HOME/conda | |
| echo "" >> $HOME/.bashrc | |
| echo "export PATH=$HOME/conda/bin:$PATH" >> $HOME/.bashrc | |
| source $HOME/.bashrc | |
| conda config --set always_yes yes --set changeps1 no | |
| conda config -f --add channels conda-forge | |
| conda config -f --add channels default | |
| conda install \ | |
| beautifulsoup4 \ | |
| cython \ | |
| findspark \ | |
| fiona \ | |
| geopandas \ | |
| hdfs3 \ | |
| numba \ | |
| pandas \ | |
| polyline \ | |
| pyarrow \ | |
| pyproj \ | |
| pyshp \ | |
| requests \ | |
| rtree \ | |
| scikit-learn \ | |
| shapely \ | |
| ujson | |
| pip install \ | |
| python-geohash | |
| sudo yum install -y geos-devel | |
| echo "export LD_LIBRARY_PATH=/usr/local/lib/:${LD_LIBRARY_PATH}" >> $HOME/.bashrc | |
| echo "export SPARK_HOME=/usr/lib/spark" >> $HOME/.bashrc | |
| echo "export PYSPARK_PYTHON=${HOME}/conda/bin/python" >> $HOME/.bashrc | |
| echo "export PATH=$SPARK_HOME/bin:$PATH" >> $HOME/.bashrc | |
| source $HOME/.bashrc | |
| if grep isMaster /mnt/var/lib/info/instance.json | grep true; then | |
| ### install dependencies for s3fs-fuse to access and store notebooks | |
| sudo yum install -y git | |
| sudo yum install -y libcurl libcurl-devel graphviz cyrus-sasl cyrus-sasl-devel readline readline-devel gnuplot | |
| sudo yum install -y automake fuse fuse-devel libxml2-devel | |
| # extract BUCKET and FOLDER to mount from NOTEBOOK_DIR | |
| NOTEBOOK_DIR="${NOTEBOOK_DIR%/}/" | |
| BUCKET=$(python -c "print('$NOTEBOOK_DIR'.split('//')[1].split('/')[0])") | |
| FOLDER=$(python -c "print('/'.join('$NOTEBOOK_DIR'.split('//')[1].split('/')[1:-1]))") | |
| echo "bucket '$BUCKET' folder '$FOLDER'" | |
| cd /mnt | |
| git clone https://github.com/s3fs-fuse/s3fs-fuse.git | |
| cd s3fs-fuse/ | |
| ls -alrt | |
| ./autogen.sh | |
| ./configure | |
| make | |
| sudo make install | |
| sudo su -c 'echo user_allow_other >> /etc/fuse.conf' | |
| mkdir -p /mnt/s3fs-cache | |
| mkdir -p /mnt/$BUCKET | |
| /usr/local/bin/s3fs \ | |
| -o allow_other \ | |
| -o iam_role=auto \ | |
| -o umask=0 \ | |
| -o url=https://s3.amazonaws.com \ | |
| -o no_check_certificate \ | |
| -o enable_noobj_cache \ | |
| -o use_cache=/mnt/s3fs-cache $BUCKET /mnt/$BUCKET | |
| conda install jupyterlab matplotlib plotly bokeh datashader | |
| # conda install -c conda-forge jupyter_contrib_nbextensions | |
| # conda install -c conda-forge jupyter_nbextensions_configurator | |
| mkdir -p ~/.jupyter | |
| touch ls ~/.jupyter/jupyter_notebook_config.py | |
| HASHED_PASSWORD=$(python -c "from notebook.auth import passwd; print(passwd('$JUPYTER_PASSWORD'))") | |
| echo "c.NotebookApp.password = u'$HASHED_PASSWORD'" >> ~/.jupyter/jupyter_notebook_config.py | |
| echo "c.NotebookApp.open_browser = False" >> ~/.jupyter/jupyter_notebook_config.py | |
| echo "c.NotebookApp.ip = '*'" >> ~/.jupyter/jupyter_notebook_config.py | |
| echo "c.NotebookApp.notebook_dir = '/mnt/$BUCKET/$FOLDER'" >> ~/.jupyter/jupyter_notebook_config.py | |
| echo "c.ContentsManager.checkpoints_kwargs = {'root_dir': '.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py | |
| echo "c.NotebookApp.port = 8889" >> ~/.jupyter/jupyter_notebook_config.py | |
| ### Setup Jupyter deamon and launch it | |
| cd ~ | |
| echo "Creating Jupyter Daemon" | |
| sudo cat <<EOF > /home/hadoop/jupyter.conf | |
| description "Jupyter" | |
| start on runlevel [2345] | |
| stop on runlevel [016] | |
| respawn | |
| respawn limit 0 10 | |
| chdir /mnt/$BUCKET/$FOLDER | |
| script | |
| sudo su - hadoop > /var/log/jupyter.log 2>&1 <<BASH_SCRIPT | |
| export JAVA_HOME="/etc/alternatives/jre" | |
| PYSPARK_DRIVER_PYTHON="/home/hadoop/conda/bin/jupyter" PYSPARK_DRIVER_PYTHON_OPTS="lab --log-level=WARN" pyspark | |
| BASH_SCRIPT | |
| end script | |
| EOF | |
| sudo mv /home/hadoop/jupyter.conf /etc/init/ | |
| sudo chown root:root /etc/init/jupyter.conf | |
| sudo initctl reload-configuration | |
| # start jupyter daemon | |
| echo "Starting Jupyter Daemon" | |
| sudo initctl start jupyter | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment