- use checkpoint in build script to skip processed steps
- create new conda activate/deactivate scripts
- pytorch is installed from conda to support CXX11 ABI
- tensorflow is installed from pip
Created
January 9, 2025 03:28
-
-
Save link89/44502931680e87de65b77551daf8835d to your computer and use it in GitHub Desktop.
An example of building deempd 3.0+ with conda & cuda 12 on rocky linux 8.5+
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -e | |
| # reset environment | |
| conda deactivate || true | |
| module purge | |
| export OPENBLAS_NUM_THREADS=16 | |
| SCRIPT_PATH=$(realpath $0) | |
| SCRIPT_DIR=$(dirname "$SCRIPT_PATH") | |
| deepmd_source_dir=$SCRIPT_DIR/deepmd-kit | |
| lammps_source_dir=$SCRIPT_DIR/lammps-29Aug2024 | |
| module load cuda/12.1 | |
| export CONDA_OVERRIDE_CUDA=12.1 # this is essential | |
| module load anaconda/2022.5 | |
| module load gcc/12.1 | |
| module load mpi/openmpi/4.0.3-gcc | |
| # module load mkl/2021.1.1 | |
| [ -f conda-create.done ] || { | |
| conda create -y -n deepmd-3-cuda12 pytorch=*=cuda120* python=3.11.11 -c conda-forge --override-channels | |
| # conda create -y -n deepmd-3-cuda12 python=3.11.11 -c conda-forge --override-channels | |
| touch conda-create.done | |
| } | |
| source activate deepmd-3-cuda12 | |
| source $CONDA_PREFIX/etc/conda/deactivate.d/libdeepmd.sh || true | |
| [ -f pip-install.done ] || { | |
| pip3 install tensorflow[and-cuda] | |
| touch pip-install.done | |
| } | |
| conda list | |
| which python3 | |
| export CC=`which gcc` | |
| export CXX=`which g++` | |
| export FC=`which gfortran` | |
| # export CUDA_HOME=$CONDA_PREFIX | |
| export NVCC_APPEND_FLAGS='-allow-unsupported-compiler' | |
| export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH | |
| export CMAKE_PREFIX_PATH=$CONDA_PREFIX | |
| export CAFFE2_USE_CUDNN=1 | |
| export CAFFE2_USE_CUSPARSELT=1 | |
| # to fix | |
| # nvcc warning : incompatible redefinition for option 'compiler-bindir', the last value of this option was used | |
| # export CUDA_HOST_COMPILER=$CC | |
| export DP_VARIANT=cuda | |
| export DP_ENABLE_PYTORCH=1 | |
| [ -f deepmd-py.done ] || { | |
| # install python module | |
| pushd $deepmd_source_dir | |
| # you may need to clean git direcotry to avoid build failure | |
| echo "install deepmd python" | |
| git clean -xdf | |
| CMAKE_ARGS="$CMAKE_ARGS -DCAFFE2_USE_CUDNN=TRUE -DCAFFE2_USE_CUSPARSELT=TRUE -DUSE_CUDA_TOOLKIT=TRUE" pip install -e . | |
| popd | |
| touch deepmd-py.done | |
| } | |
| module unload cmake | |
| module load dev/cmake/3.26.3 | |
| which cmake | |
| # build c++ module | |
| deepmd_root=$CONDA_PREFIX/opt/deepmd | |
| echo "deepmd_root=$deepmd_root" | |
| mkdir -p $deepmd_root | |
| [ -f deepmd-c.done ] || { | |
| rm -rf $deepmd_source_dir/source/build || true | |
| mkdir -p $deepmd_source_dir/source/build | |
| pushd $deepmd_source_dir/source/build | |
| # export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" | |
| cmake -DUSE_TF_PYTHON_LIBS=TRUE -DENABLE_PYTORCH=TRUE -DENABLE_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=$deepmd_root -DCAFFE2_USE_CUDNN=TRUE -DCAFFE2_USE_CUSPARSELT=TRUE -DUSE_CUDA_TOOLKIT=TRUE -DLAMMPS_SOURCE_ROOT=$lammps_source_dir -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCUDNN_ROOT=$CONDA_PREFIX -DCUSPARSELT_ROOT=$CONDA_PREFIX .. | |
| make -j8 | |
| make install | |
| popd | |
| touch deepmd-c.done | |
| } | |
| # install lammps | |
| lammps_root=$CONDA_PREFIX/opt/lammps | |
| [ -f lammps.done ] || { | |
| rm -rf $lammps_source_dir/build || true | |
| mkdir -p $lammps_source_dir/build | |
| pushd $lammps_source_dir/build | |
| # https://docs.lammps.org/Packages_details.html | |
| cmake -D PKG_PLUGIN=ON -D PKG_EXTRA-FIX=ON -D PKG_KSPACE=ON -D PKG_MISC=ON -D PKG_PLUMED=ON -D LAMMPS_INSTALL_RPATH=ON -D BUILD_MPI=yes -D BUILD_OMP=yes -D BUILD_SHARED_LIBS=yes -D CMAKE_INSTALL_PREFIX=$lammps_root -D CMAKE_INSTALL_LIBDIR=lib -D CMAKE_INSTALL_FULL_LIBDIR=${lammps_root}/lib ../cmake | |
| make -j8 | |
| make install | |
| popd | |
| touch lammps.done | |
| } | |
| # generate activate script | |
| cat << __EOF > $CONDA_PREFIX/etc/conda/activate.d/libdeepmd.sh | |
| export LAMMPS_PLUGIN_PATH=\$CONDA_PREFIX/opt/deepmd/lib/deepmd_lmp | |
| export CUDA_HOME=\$CONDA_PREFIX | |
| export _ADD_LD_LIBRARY_PATH=\$CONDA_PREFIX/opt/deepmd/lib:\$CONDA_PREFIX/lib | |
| export _ADD_PATH=\$CONDA_PREFIX/opt/lammps/bin | |
| export LD_LIBRARY_PATH=\$_ADD_LD_LIBRARY_PATH:\$LD_LIBRARY_PATH | |
| export PATH=\$_ADD_PATH:\$PATH | |
| cat << EOF | |
| This environment have cuda-toolkit included, | |
| you don't need to load extra cuda module. | |
| If you are to run dp train, you don't need to load extra packages. | |
| If you are to run LAMMPS, | |
| then you must add the following modules to your script. | |
| module load gcc/12.1 | |
| module load mpi/openmpi/4.0.3-gcc | |
| EOF | |
| __EOF | |
| cat << __EOF > $CONDA_PREFIX/etc/conda/deactivate.d/libdeepmd.sh | |
| export LAMMPS_PLUGIN_PATH= | |
| export CUDA_HOME= | |
| export LD_LIBRARY_PATH="\${LD_LIBRARY_PATH/"\$_ADD_LD_LIBRARY_PATH:"/""}" | |
| export PATH="\${PATH/"\$_ADD_PATH:"/""}" | |
| export _ADD_PATH= | |
| export _ADD_LD_LIBRARY_PATH= | |
| __EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment