Skip to content

Instantly share code, notes, and snippets.

@link89
Created January 9, 2025 03:28
Show Gist options
  • Select an option

  • Save link89/44502931680e87de65b77551daf8835d to your computer and use it in GitHub Desktop.

Select an option

Save link89/44502931680e87de65b77551daf8835d to your computer and use it in GitHub Desktop.
An example of building deempd 3.0+ with conda & cuda 12 on rocky linux 8.5+

Summary

  • use checkpoint in build script to skip processed steps
  • create new conda activate/deactivate scripts
  • pytorch is installed from conda to support CXX11 ABI
  • tensorflow is installed from pip
#!/bin/bash
set -e
# reset environment
conda deactivate || true
module purge
export OPENBLAS_NUM_THREADS=16
SCRIPT_PATH=$(realpath $0)
SCRIPT_DIR=$(dirname "$SCRIPT_PATH")
deepmd_source_dir=$SCRIPT_DIR/deepmd-kit
lammps_source_dir=$SCRIPT_DIR/lammps-29Aug2024
module load cuda/12.1
export CONDA_OVERRIDE_CUDA=12.1 # this is essential
module load anaconda/2022.5
module load gcc/12.1
module load mpi/openmpi/4.0.3-gcc
# module load mkl/2021.1.1
[ -f conda-create.done ] || {
conda create -y -n deepmd-3-cuda12 pytorch=*=cuda120* python=3.11.11 -c conda-forge --override-channels
# conda create -y -n deepmd-3-cuda12 python=3.11.11 -c conda-forge --override-channels
touch conda-create.done
}
source activate deepmd-3-cuda12
source $CONDA_PREFIX/etc/conda/deactivate.d/libdeepmd.sh || true
[ -f pip-install.done ] || {
pip3 install tensorflow[and-cuda]
touch pip-install.done
}
conda list
which python3
export CC=`which gcc`
export CXX=`which g++`
export FC=`which gfortran`
# export CUDA_HOME=$CONDA_PREFIX
export NVCC_APPEND_FLAGS='-allow-unsupported-compiler'
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
export CMAKE_PREFIX_PATH=$CONDA_PREFIX
export CAFFE2_USE_CUDNN=1
export CAFFE2_USE_CUSPARSELT=1
# to fix
# nvcc warning : incompatible redefinition for option 'compiler-bindir', the last value of this option was used
# export CUDA_HOST_COMPILER=$CC
export DP_VARIANT=cuda
export DP_ENABLE_PYTORCH=1
[ -f deepmd-py.done ] || {
# install python module
pushd $deepmd_source_dir
# you may need to clean git direcotry to avoid build failure
echo "install deepmd python"
git clean -xdf
CMAKE_ARGS="$CMAKE_ARGS -DCAFFE2_USE_CUDNN=TRUE -DCAFFE2_USE_CUSPARSELT=TRUE -DUSE_CUDA_TOOLKIT=TRUE" pip install -e .
popd
touch deepmd-py.done
}
module unload cmake
module load dev/cmake/3.26.3
which cmake
# build c++ module
deepmd_root=$CONDA_PREFIX/opt/deepmd
echo "deepmd_root=$deepmd_root"
mkdir -p $deepmd_root
[ -f deepmd-c.done ] || {
rm -rf $deepmd_source_dir/source/build || true
mkdir -p $deepmd_source_dir/source/build
pushd $deepmd_source_dir/source/build
# export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0"
cmake -DUSE_TF_PYTHON_LIBS=TRUE -DENABLE_PYTORCH=TRUE -DENABLE_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root -DCAFFE2_USE_CUDNN=TRUE -DCAFFE2_USE_CUSPARSELT=TRUE -DUSE_CUDA_TOOLKIT=TRUE -DLAMMPS_SOURCE_ROOT=$lammps_source_dir -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCUDNN_ROOT=$CONDA_PREFIX -DCUSPARSELT_ROOT=$CONDA_PREFIX ..
make -j8
make install
popd
touch deepmd-c.done
}
# install lammps
lammps_root=$CONDA_PREFIX/opt/lammps
[ -f lammps.done ] || {
rm -rf $lammps_source_dir/build || true
mkdir -p $lammps_source_dir/build
pushd $lammps_source_dir/build
# https://docs.lammps.org/Packages_details.html
cmake -D PKG_PLUGIN=ON -D PKG_EXTRA-FIX=ON -D PKG_KSPACE=ON -D PKG_MISC=ON -D PKG_PLUMED=ON -D LAMMPS_INSTALL_RPATH=ON -D BUILD_MPI=yes -D BUILD_OMP=yes -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=$lammps_root -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${lammps_root}/lib ../cmake
make -j8
make install
popd
touch lammps.done
}
# generate activate script
cat << __EOF > $CONDA_PREFIX/etc/conda/activate.d/libdeepmd.sh
export LAMMPS_PLUGIN_PATH=\$CONDA_PREFIX/opt/deepmd/lib/deepmd_lmp
export CUDA_HOME=\$CONDA_PREFIX
export _ADD_LD_LIBRARY_PATH=\$CONDA_PREFIX/opt/deepmd/lib:\$CONDA_PREFIX/lib
export _ADD_PATH=\$CONDA_PREFIX/opt/lammps/bin
export LD_LIBRARY_PATH=\$_ADD_LD_LIBRARY_PATH:\$LD_LIBRARY_PATH
export PATH=\$_ADD_PATH:\$PATH
cat << EOF
This environment have cuda-toolkit included,
you don't need to load extra cuda module.
If you are to run dp train, you don't need to load extra packages.
If you are to run LAMMPS,
then you must add the following modules to your script.
module load gcc/12.1
module load mpi/openmpi/4.0.3-gcc
EOF
__EOF
cat << __EOF > $CONDA_PREFIX/etc/conda/deactivate.d/libdeepmd.sh
export LAMMPS_PLUGIN_PATH=
export CUDA_HOME=
export LD_LIBRARY_PATH="\${LD_LIBRARY_PATH/"\$_ADD_LD_LIBRARY_PATH:"/""}"
export PATH="\${PATH/"\$_ADD_PATH:"/""}"
export _ADD_PATH=
export _ADD_LD_LIBRARY_PATH=
__EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment