Skip to content

Instantly share code, notes, and snippets.

@NDari
Created May 16, 2018 15:06
Show Gist options
  • Select an option

  • Save NDari/c1fb65f0b304975d367fc58ced3fa340 to your computer and use it in GitHub Desktop.

Select an option

Save NDari/c1fb65f0b304975d367fc58ced3fa340 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -ex
JUPYTER_PASSWORD=${1:-"<YOUR_PASSWORD_HERE>"}
NOTEBOOK_DIR=${2:-"<PATH_TO_S3_NOTEBOOK_BUCKET>"}
# home backup
if [ ! -d /mnt/home_backup ]; then
sudo mkdir /mnt/home_backup
sudo cp -a /home/* /mnt/home_backup
fi
# mount home to /mnt
if [ ! -d /mnt/home ]; then
sudo mv /home/ /mnt/
sudo ln -s /mnt/home /home
fi
# install latest python using the miniconda installer
aws s3 cp <PATH_TO_MINICONDA_INSTALL_SCRIPT_ON_S3> .
bash <MINICONDA_INSTALL_SCRIPT> -b -p $HOME/conda
echo "" >> $HOME/.bashrc
echo "export PATH=$HOME/conda/bin:$PATH" >> $HOME/.bashrc
source $HOME/.bashrc
conda config --set always_yes yes --set changeps1 no
conda config -f --add channels conda-forge
conda config -f --add channels default
conda install \
beautifulsoup4 \
cython \
findspark \
fiona \
geopandas \
hdfs3 \
numba \
pandas \
polyline \
pyarrow \
pyproj \
pyshp \
requests \
rtree \
scikit-learn \
shapely \
ujson
pip install \
python-geohash
sudo yum install -y geos-devel
echo "export LD_LIBRARY_PATH=/usr/local/lib/:${LD_LIBRARY_PATH}" >> $HOME/.bashrc
echo "export SPARK_HOME=/usr/lib/spark" >> $HOME/.bashrc
echo "export PYSPARK_PYTHON=${HOME}/conda/bin/python" >> $HOME/.bashrc
echo "export PATH=$SPARK_HOME/bin:$PATH" >> $HOME/.bashrc
source $HOME/.bashrc
if grep isMaster /mnt/var/lib/info/instance.json | grep true; then
### install dependencies for s3fs-fuse to access and store notebooks
sudo yum install -y git
sudo yum install -y libcurl libcurl-devel graphviz cyrus-sasl cyrus-sasl-devel readline readline-devel gnuplot
sudo yum install -y automake fuse fuse-devel libxml2-devel
# extract BUCKET and FOLDER to mount from NOTEBOOK_DIR
NOTEBOOK_DIR="${NOTEBOOK_DIR%/}/"
BUCKET=$(python -c "print('$NOTEBOOK_DIR'.split('//')[1].split('/')[0])")
FOLDER=$(python -c "print('/'.join('$NOTEBOOK_DIR'.split('//')[1].split('/')[1:-1]))")
echo "bucket '$BUCKET' folder '$FOLDER'"
cd /mnt
git clone https://github.com/s3fs-fuse/s3fs-fuse.git
cd s3fs-fuse/
ls -alrt
./autogen.sh
./configure
make
sudo make install
sudo su -c 'echo user_allow_other >> /etc/fuse.conf'
mkdir -p /mnt/s3fs-cache
mkdir -p /mnt/$BUCKET
/usr/local/bin/s3fs \
-o allow_other \
-o iam_role=auto \
-o umask=0 \
-o url=https://s3.amazonaws.com \
-o no_check_certificate \
-o enable_noobj_cache \
-o use_cache=/mnt/s3fs-cache $BUCKET /mnt/$BUCKET
conda install jupyterlab matplotlib plotly bokeh datashader
# conda install -c conda-forge jupyter_contrib_nbextensions
# conda install -c conda-forge jupyter_nbextensions_configurator
mkdir -p ~/.jupyter
touch ls ~/.jupyter/jupyter_notebook_config.py
HASHED_PASSWORD=$(python -c "from notebook.auth import passwd; print(passwd('$JUPYTER_PASSWORD'))")
echo "c.NotebookApp.password = u'$HASHED_PASSWORD'" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.open_browser = False" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.ip = '*'" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.notebook_dir = '/mnt/$BUCKET/$FOLDER'" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.ContentsManager.checkpoints_kwargs = {'root_dir': '.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.port = 8889" >> ~/.jupyter/jupyter_notebook_config.py
### Setup Jupyter deamon and launch it
cd ~
echo "Creating Jupyter Daemon"
sudo cat <<EOF > /home/hadoop/jupyter.conf
description "Jupyter"
start on runlevel [2345]
stop on runlevel [016]
respawn
respawn limit 0 10
chdir /mnt/$BUCKET/$FOLDER
script
sudo su - hadoop > /var/log/jupyter.log 2>&1 <<BASH_SCRIPT
export JAVA_HOME="/etc/alternatives/jre"
PYSPARK_DRIVER_PYTHON="/home/hadoop/conda/bin/jupyter" PYSPARK_DRIVER_PYTHON_OPTS="lab --log-level=WARN" pyspark
BASH_SCRIPT
end script
EOF
sudo mv /home/hadoop/jupyter.conf /etc/init/
sudo chown root:root /etc/init/jupyter.conf
sudo initctl reload-configuration
# start jupyter daemon
echo "Starting Jupyter Daemon"
sudo initctl start jupyter
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment