donovanr · November 2, 2016 21:54
diff --git a/installing_kears_genomics_on_aws_the_hard_way b/installing_kears_genomics_on_aws_the_hard_way
 # install matching nvidia drivers on AWS
 # from https://stradajl.readthedocs.io/en/latest/tutorial/
 # really should rebuild genomics container on top of nvidia-docker
 # ----------------------------------------------------------------

 # First update the system and install build-essential:

 sudo apt-get update && sudo apt-get upgrade
 sudo apt-get install build-essential

 # Next, download the NVIDIA driver

 wget http://developer.download.nvidia.com/compute/cuda/7_0/Prod/local_installers/cuda_7.0.28_linux.run

 # Extract the installers using

 chmod +x cuda_7.0.28_linux.run
 mkdir nvidia_installers
 ./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers

 # Then update the linux image to be compatible with NVIDIA's drivers:

 sudo apt-get install linux-image-extra-virtual

 # While installing the linux-image-extra-virtual, you may be prompted "What would you like to do about menu.lst?". I selected "keep the local version currently installed".

 # Now we have to disable nouveau since it conflicts with NVIDIA's kernel module. Open

 sudo vi /etc/modprobe.d/blacklist-nouveau.conf

 # and add the following lines to this file:

 blacklist nouveau
 blacklist lbm-nouveau
 options nouveau modeset=0
 alias nouveau off
 alias lbm-nouveau off

 # Back in the shell, execute the commands:

 echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/nouveau-kms.conf
 sudo update-initramfs -u
 sudo reboot

 # After the reboot, we can finally install the driver:

 sudo apt-get install linux-source
 sudo apt-get install linux-headers-`uname -r`

 cd nvidia_installers
 sudo ./NVIDIA-Linux-x86_64-346.46.run

 # Just select the defaults for all the questions that pop up.

 # Now we can install CUDA

 sudo modprobe nvidia
 sudo ./cuda-linux64-rel-7.0.28-19326674.run
 sudo ./cuda-samples-linux-7.0.28-19326674.run

 # Follow the suggestion to add the following to your .bashrc

 export PATH=$PATH:/usr/local/cuda-7.0/bin
 export LD_LIBRARY_PATH=:/usr/local/cuda-7.0/lib64

 # Now reboot

 sudo reboot

 # and log back in to check the driver version

 nvidia-smi

 # should be NVIDIA-SMI 346.46     Driver Version: 346.46

 # -----------------------------
 # install docker from main repo
 # -----------------------------

 # installing docker on aws ubuntu 14.04 gpu instance:
 # (from https://docs.docker.com/engine/installation/linux/ubuntulinux/)

 sudo apt-get update
 sudo apt-get install apt-transport-https ca-certificates
 sudo apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D
 echo deb https://apt.dockerproject.org/repo ubuntu-trusty main | sudo tee /etc/apt/sources.list.d/docker.list
 sudo apt-get update
 sudo apt-get install linux-headers-3.13.0-100-generic
 sudo apt-get install linux-image-extra-$(uname -r) linux-image-extra-virtual
 sudo apt-get install docker-engine
 sudo usermod -aG docker ubuntu

 # use some script to make nividia-uwm show up in /dev
 # save the following in uvm.sh and sudo bash uvm.sh to run

 #!/bin/bash

 /sbin/modprobe nvidia

 if [ "$?" -eq 0 ]; then
  # Count the number of NVIDIA controllers found.
  NVDEVS=`lspci | grep -i NVIDIA`
  N3D=`echo "$NVDEVS" | grep "3D controller" | wc -l`
  NVGA=`echo "$NVDEVS" | grep "VGA compatible controller" | wc -l`

  N=`expr $N3D + $NVGA - 1`
  for i in `seq 0 $N`; do
    mknod -m 666 /dev/nvidia$i c 195 $i
  done

  mknod -m 666 /dev/nvidiactl c 195 255

 else
  exit 1
 fi

 /sbin/modprobe nvidia-uvm

 if [ "$?" -eq 0 ]; then
  # Find out the major device number used by the nvidia-uvm driver
  D=`grep nvidia-uvm /proc/devices | awk '{print $1}'`

  mknod -m 666 /dev/nvidia-uvm c $D 0
 else
  exit 1
 fi

 ___________________

 # edit dockerfile and rebuild image

 # dd the following line to Dockerfile, right after the same for main.py

 COPY mydense.py /scripts/

 # now build the image

 docker build -t rdonovan/keras-genomics:hack .

 # push it to docker hub:

 docker push rdonovan/keras-genomics:hack

 # now run

 docker run --rm --device /dev/nvidiactl --device /dev/nvidia-uvm --device /dev/nvidia0     -v $(pwd)/example:/modeldir -v $(pwd)/expt1:/datadir rorydm/keras-genomics         python main.py -d /datadir -c trial2 -m /modeldir/model.py -s 1001 -y -t -e

 # it will take a minute to initialize the model, then the screen should be full of epoch test train validation metrics.
	# install matching nvidia drivers on AWS
	# from https://stradajl.readthedocs.io/en/latest/tutorial/
	# really should rebuild genomics container on top of nvidia-docker
	# ----------------------------------------------------------------

	# First update the system and install build-essential:

	sudo apt-get update && sudo apt-get upgrade
	sudo apt-get install build-essential

	# Next, download the NVIDIA driver

	wget http://developer.download.nvidia.com/compute/cuda/7_0/Prod/local_installers/cuda_7.0.28_linux.run

	# Extract the installers using

	chmod +x cuda_7.0.28_linux.run
	mkdir nvidia_installers
	./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers

	# Then update the linux image to be compatible with NVIDIA's drivers:

	sudo apt-get install linux-image-extra-virtual

	# While installing the linux-image-extra-virtual, you may be prompted "What would you like to do about menu.lst?". I selected "keep the local version currently installed".

	# Now we have to disable nouveau since it conflicts with NVIDIA's kernel module. Open

	sudo vi /etc/modprobe.d/blacklist-nouveau.conf

	# and add the following lines to this file:

	blacklist nouveau
	blacklist lbm-nouveau
	options nouveau modeset=0
	alias nouveau off
	alias lbm-nouveau off

	# Back in the shell, execute the commands:

	echo options nouveau modeset=0 \| sudo tee -a /etc/modprobe.d/nouveau-kms.conf
	sudo update-initramfs -u
	sudo reboot

	# After the reboot, we can finally install the driver:

	sudo apt-get install linux-source
	sudo apt-get install linux-headers-`uname -r`

	cd nvidia_installers
	sudo ./NVIDIA-Linux-x86_64-346.46.run

	# Just select the defaults for all the questions that pop up.

	# Now we can install CUDA

	sudo modprobe nvidia
	sudo ./cuda-linux64-rel-7.0.28-19326674.run
	sudo ./cuda-samples-linux-7.0.28-19326674.run

	# Follow the suggestion to add the following to your .bashrc

	export PATH=$PATH:/usr/local/cuda-7.0/bin
	export LD_LIBRARY_PATH=:/usr/local/cuda-7.0/lib64

	# Now reboot

	sudo reboot

	# and log back in to check the driver version

	nvidia-smi

	# should be NVIDIA-SMI 346.46 Driver Version: 346.46

	# -----------------------------
	# install docker from main repo
	# -----------------------------

	# installing docker on aws ubuntu 14.04 gpu instance:
	# (from https://docs.docker.com/engine/installation/linux/ubuntulinux/)

	sudo apt-get update
	sudo apt-get install apt-transport-https ca-certificates
	sudo apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D
	echo deb https://apt.dockerproject.org/repo ubuntu-trusty main \| sudo tee /etc/apt/sources.list.d/docker.list
	sudo apt-get update
	sudo apt-get install linux-headers-3.13.0-100-generic
	sudo apt-get install linux-image-extra-$(uname -r) linux-image-extra-virtual
	sudo apt-get install docker-engine
	sudo usermod -aG docker ubuntu

	# use some script to make nividia-uwm show up in /dev
	# save the following in uvm.sh and sudo bash uvm.sh to run

	#!/bin/bash

	/sbin/modprobe nvidia

	if [ "$?" -eq 0 ]; then
	# Count the number of NVIDIA controllers found.
	NVDEVS=`lspci \| grep -i NVIDIA`
	N3D=`echo "$NVDEVS" \| grep "3D controller" \| wc -l`
	NVGA=`echo "$NVDEVS" \| grep "VGA compatible controller" \| wc -l`

	N=`expr $N3D + $NVGA - 1`
	for i in `seq 0 $N`; do
	mknod -m 666 /dev/nvidia$i c 195 $i
	done

	mknod -m 666 /dev/nvidiactl c 195 255

	else
	exit 1
	fi

	/sbin/modprobe nvidia-uvm

	if [ "$?" -eq 0 ]; then
	# Find out the major device number used by the nvidia-uvm driver
	D=`grep nvidia-uvm /proc/devices \| awk '{print $1}'`

	mknod -m 666 /dev/nvidia-uvm c $D 0
	else
	exit 1
	fi

	___________________

	# edit dockerfile and rebuild image

	# dd the following line to Dockerfile, right after the same for main.py

	COPY mydense.py /scripts/

	# now build the image

	docker build -t rdonovan/keras-genomics:hack .

	# push it to docker hub:

	docker push rdonovan/keras-genomics:hack

	# now run

	docker run --rm --device /dev/nvidiactl --device /dev/nvidia-uvm --device /dev/nvidia0 -v $(pwd)/example:/modeldir -v $(pwd)/expt1:/datadir rorydm/keras-genomics python main.py -d /datadir -c trial2 -m /modeldir/model.py -s 1001 -y -t -e

	# it will take a minute to initialize the model, then the screen should be full of epoch test train validation metrics.
No results found