Created
November 2, 2016 21:54
-
-
Save donovanr/c8f17f2d337ff0303fa0242e331df505 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # install matching nvidia drivers on AWS | |
| # from https://stradajl.readthedocs.io/en/latest/tutorial/ | |
| # really should rebuild genomics container on top of nvidia-docker | |
| # ---------------------------------------------------------------- | |
| # First update the system and install build-essential: | |
| sudo apt-get update && sudo apt-get upgrade | |
| sudo apt-get install build-essential | |
| # Next, download the NVIDIA driver | |
| wget http://developer.download.nvidia.com/compute/cuda/7_0/Prod/local_installers/cuda_7.0.28_linux.run | |
| # Extract the installers using | |
| chmod +x cuda_7.0.28_linux.run | |
| mkdir nvidia_installers | |
| ./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers | |
| # Then update the linux image to be compatible with NVIDIA's drivers: | |
| sudo apt-get install linux-image-extra-virtual | |
| # While installing the linux-image-extra-virtual, you may be prompted "What would you like to do about menu.lst?". I selected "keep the local version currently installed". | |
| # Now we have to disable nouveau since it conflicts with NVIDIA's kernel module. Open | |
| sudo vi /etc/modprobe.d/blacklist-nouveau.conf | |
| # and add the following lines to this file: | |
| blacklist nouveau | |
| blacklist lbm-nouveau | |
| options nouveau modeset=0 | |
| alias nouveau off | |
| alias lbm-nouveau off | |
| # Back in the shell, execute the commands: | |
| echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/nouveau-kms.conf | |
| sudo update-initramfs -u | |
| sudo reboot | |
| # After the reboot, we can finally install the driver: | |
| sudo apt-get install linux-source | |
| sudo apt-get install linux-headers-`uname -r` | |
| cd nvidia_installers | |
| sudo ./NVIDIA-Linux-x86_64-346.46.run | |
| # Just select the defaults for all the questions that pop up. | |
| # Now we can install CUDA | |
| sudo modprobe nvidia | |
| sudo ./cuda-linux64-rel-7.0.28-19326674.run | |
| sudo ./cuda-samples-linux-7.0.28-19326674.run | |
| # Follow the suggestion to add the following to your .bashrc | |
| export PATH=$PATH:/usr/local/cuda-7.0/bin | |
| export LD_LIBRARY_PATH=:/usr/local/cuda-7.0/lib64 | |
| # Now reboot | |
| sudo reboot | |
| # and log back in to check the driver version | |
| nvidia-smi | |
| # should be NVIDIA-SMI 346.46 Driver Version: 346.46 | |
| # ----------------------------- | |
| # install docker from main repo | |
| # ----------------------------- | |
| # installing docker on aws ubuntu 14.04 gpu instance: | |
| # (from https://docs.docker.com/engine/installation/linux/ubuntulinux/) | |
| sudo apt-get update | |
| sudo apt-get install apt-transport-https ca-certificates | |
| sudo apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D | |
| echo deb https://apt.dockerproject.org/repo ubuntu-trusty main | sudo tee /etc/apt/sources.list.d/docker.list | |
| sudo apt-get update | |
| sudo apt-get install linux-headers-3.13.0-100-generic | |
| sudo apt-get install linux-image-extra-$(uname -r) linux-image-extra-virtual | |
| sudo apt-get install docker-engine | |
| sudo usermod -aG docker ubuntu | |
| # use some script to make nividia-uwm show up in /dev | |
| # save the following in uvm.sh and sudo bash uvm.sh to run | |
| #!/bin/bash | |
| /sbin/modprobe nvidia | |
| if [ "$?" -eq 0 ]; then | |
| # Count the number of NVIDIA controllers found. | |
| NVDEVS=`lspci | grep -i NVIDIA` | |
| N3D=`echo "$NVDEVS" | grep "3D controller" | wc -l` | |
| NVGA=`echo "$NVDEVS" | grep "VGA compatible controller" | wc -l` | |
| N=`expr $N3D + $NVGA - 1` | |
| for i in `seq 0 $N`; do | |
| mknod -m 666 /dev/nvidia$i c 195 $i | |
| done | |
| mknod -m 666 /dev/nvidiactl c 195 255 | |
| else | |
| exit 1 | |
| fi | |
| /sbin/modprobe nvidia-uvm | |
| if [ "$?" -eq 0 ]; then | |
| # Find out the major device number used by the nvidia-uvm driver | |
| D=`grep nvidia-uvm /proc/devices | awk '{print $1}'` | |
| mknod -m 666 /dev/nvidia-uvm c $D 0 | |
| else | |
| exit 1 | |
| fi | |
| ___________________ | |
| # edit dockerfile and rebuild image | |
| # dd the following line to Dockerfile, right after the same for main.py | |
| COPY mydense.py /scripts/ | |
| # now build the image | |
| docker build -t rdonovan/keras-genomics:hack . | |
| # push it to docker hub: | |
| docker push rdonovan/keras-genomics:hack | |
| # now run | |
| docker run --rm --device /dev/nvidiactl --device /dev/nvidia-uvm --device /dev/nvidia0 -v $(pwd)/example:/modeldir -v $(pwd)/expt1:/datadir rorydm/keras-genomics python main.py -d /datadir -c trial2 -m /modeldir/model.py -s 1001 -y -t -e | |
| # it will take a minute to initialize the model, then the screen should be full of epoch test train validation metrics. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment