diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..ae992e3 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index b8ce53b..44a422a 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -10,6 +10,6 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Build the Docker image run: docker build . --file Dockerfile --tag openfold:$(date +%s) \ No newline at end of file diff --git a/.github/workflows/undefined_names.yml b/.github/workflows/undefined_names.yml index 8ff9f24..a689751 100644 --- a/.github/workflows/undefined_names.yml +++ b/.github/workflows/undefined_names.yml @@ -4,8 +4,8 @@ jobs: undefined_names: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 - run: pip install --upgrade pip - run: pip install flake8 - run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics diff --git a/Dockerfile b/Dockerfile index 4a9f647..c7d9483 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04 +FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04 # metainformation LABEL org.opencontainers.image.version = "1.0.0" @@ -13,24 +13,23 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/ RUN apt-get update && apt-get install -y wget libxml2 cuda-minimal-build-11-3 libcusparse-dev-11-3 libcublas-dev-11-3 libcusolver-dev-11-3 git RUN wget -P /tmp \ - "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \ - && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \ - && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" \ + && bash /tmp/Miniforge3-Linux-x86_64.sh -b -p /opt/conda \ + && rm /tmp/Miniforge3-Linux-x86_64.sh ENV PATH /opt/conda/bin:$PATH COPY environment.yml /opt/openfold/environment.yml # installing into the base environment since the docker container wont do anything other than run openfold -RUN conda env update -n base --file /opt/openfold/environment.yml && conda clean --all +RUN mamba env update -n base --file /opt/openfold/environment.yml && mamba clean --all +RUN export LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH} COPY openfold /opt/openfold/openfold COPY scripts /opt/openfold/scripts COPY run_pretrained_openfold.py /opt/openfold/run_pretrained_openfold.py COPY train_openfold.py /opt/openfold/train_openfold.py COPY setup.py /opt/openfold/setup.py -COPY lib/openmm.patch /opt/openfold/lib/openmm.patch RUN wget -q -P /opt/openfold/openfold/resources \ https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt -RUN patch -p0 -d /opt/conda/lib/python3.9/site-packages/ < /opt/openfold/lib/openmm.patch WORKDIR /opt/openfold RUN python3 setup.py install diff --git a/README.md b/README.md index d57b945..ed6d114 100644 --- a/README.md +++ b/README.md @@ -48,37 +48,19 @@ and one of {`jackhmmer`, [MMseqs2](https://github.com/soedinglab/mmseqs2) (night installed on on your system. You'll need `git-lfs` to download OpenFold parameters. Finally, some download scripts require `aria2c` and `aws`. -For convenience, we provide a script that installs Miniconda locally, creates a -`conda` virtual environment, installs all Python dependencies, and downloads -useful resources, including both sets of model parameters. Run: +This package is currently supported for CUDA 11 and Pytorch 1.12 -```bash -scripts/install_third_party_dependencies.sh -``` +To install: +1. Clone the repository, e.g. `git clone https://github.com/aqlaboratory/openfold.git` +1. From the `openfold` repo: + - Create a [Mamba]("https://github.com/conda-forge/miniforge/releases/latest/download/) environment, e.g. + `mamba env create -n openfold_env -f environment.yml` + Mamba is recommended as the dependencies required by OpenFold are quite large and mamba can speed up the process. + - Activate the environment, e.g `conda activate openfold_env` +1. Run `scripts/install_third_party_dependencies.sh` to configure kernels and folding resources. -To activate the environment, run: +For some systems, it may help to append the Conda environment library path to `$LD_LIBRARY_PATH`. The `install_third_party_dependencies.sh` script does this once, but you may need this for each bash instance. -```bash -source scripts/activate_conda_env.sh -``` - -To deactivate it, run: - -```bash -source scripts/deactivate_conda_env.sh -``` - -With the environment active, compile OpenFold's CUDA kernels with - -```bash -python3 setup.py install -``` - -To install the HH-suite to `/usr/bin`, run - -```bash -# scripts/install_hh_suite.sh -``` ## Usage diff --git a/environment.yml b/environment.yml index 595d7da..0aac956 100644 --- a/environment.yml +++ b/environment.yml @@ -1,32 +1,37 @@ -name: openfold_venv +name: openfold-venv channels: - conda-forge - bioconda - pytorch dependencies: - - conda-forge::python=3.9 - - conda-forge::setuptools=59.5.0 - - conda-forge::pip - - conda-forge::openmm=7.5.1 - - conda-forge::pdbfixer - - conda-forge::cudatoolkit==11.3.* + - python=3.9 + - libgcc=7.2 + - setuptools=59.5.0 + - pip + - openmm=7.7 + - pdbfixer + - cudatoolkit==11.3.* + - pytorch-lightning==1.5.10 + - biopython==1.79 + - pandas==2.0.2 + - numpy==1.21 + - PyYAML==5.4.1 + - requests + - scipy==1.7 + - tqdm==4.62.2 + - typing-extensions==3.10 + - wandb==0.12.21 + - modelcif==0.7 + - awscli + - ml-collections + - aria2 + - git - bioconda::hmmer==3.3.2 - bioconda::hhsuite==3.3.0 - bioconda::kalign2==2.04 - pytorch::pytorch=1.12.* - pip: - - biopython==1.79 - deepspeed==0.5.10 - dm-tree==0.1.6 - - ml-collections==0.1.0 - - pandas==2.0.2 - - numpy==1.21.2 - - PyYAML==5.4.1 - - requests==2.26.0 - - scipy==1.7.1 - - tqdm==4.62.2 - - typing-extensions==3.10.0.2 - - pytorch_lightning==1.5.10 - - wandb==0.12.21 - - modelcif==0.7 - git+https://github.com/NVIDIA/dllogger.git + - git+https://github.com/Dao-AILab/flash-attention.git@5b838a8 diff --git a/lib/openmm.patch b/lib/openmm.patch deleted file mode 100644 index cc09e9c..0000000 --- a/lib/openmm.patch +++ /dev/null @@ -1,42 +0,0 @@ -Index: simtk/openmm/app/topology.py -=================================================================== ---- simtk.orig/openmm/app/topology.py -+++ simtk/openmm/app/topology.py -@@ -356,19 +356,35 @@ - def isCyx(res): - names = [atom.name for atom in res._atoms] - return 'SG' in names and 'HG' not in names -+ # This function is used to prevent multiple di-sulfide bonds from being -+ # assigned to a given atom. This is a DeepMind modification. -+ def isDisulfideBonded(atom): -+ for b in self._bonds: -+ if (atom in b and b[0].name == 'SG' and -+ b[1].name == 'SG'): -+ return True -+ -+ return False - - cyx = [res for res in self.residues() if res.name == 'CYS' and isCyx(res)] - atomNames = [[atom.name for atom in res._atoms] for res in cyx] - for i in range(len(cyx)): - sg1 = cyx[i]._atoms[atomNames[i].index('SG')] - pos1 = positions[sg1.index] -+ candidate_distance, candidate_atom = 0.3*nanometers, None - for j in range(i): - sg2 = cyx[j]._atoms[atomNames[j].index('SG')] - pos2 = positions[sg2.index] - delta = [x-y for (x,y) in zip(pos1, pos2)] - distance = sqrt(delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2]) -- if distance < 0.3*nanometers: -- self.addBond(sg1, sg2) -+ if distance < candidate_distance and not isDisulfideBonded(sg2): -+ candidate_distance = distance -+ candidate_atom = sg2 -+ # Assign bond to closest pair. -+ if candidate_atom: -+ self.addBond(sg1, candidate_atom) -+ -+ - - class Chain(object): - """A Chain object represents a chain within a Topology.""" diff --git a/openfold/np/relax/amber_minimize.py b/openfold/np/relax/amber_minimize.py index c32a44b..02816bb 100644 --- a/openfold/np/relax/amber_minimize.py +++ b/openfold/np/relax/amber_minimize.py @@ -28,18 +28,10 @@ import openfold.utils.loss as loss from openfold.np.relax import cleanup, utils import ml_collections import numpy as np -try: - # openmm >= 7.6 - import openmm - from openmm import unit - from openmm import app as openmm_app - from openmm.app.internal.pdbstructure import PdbStructure -except ImportError: - # openmm < 7.6 (requires DeepMind patch) - from simtk import openmm - from simtk import unit - from simtk.openmm import app as openmm_app - from simtk.openmm.app.internal.pdbstructure import PdbStructure +import openmm +from openmm import unit +from openmm import app as openmm_app +from openmm.app.internal.pdbstructure import PdbStructure ENERGY = unit.kilocalories_per_mole LENGTH = unit.angstroms diff --git a/openfold/np/relax/cleanup.py b/openfold/np/relax/cleanup.py index 472068b..a435692 100644 --- a/openfold/np/relax/cleanup.py +++ b/openfold/np/relax/cleanup.py @@ -20,14 +20,8 @@ cases like removing chains of length one (see clean_structure). import io import pdbfixer -try: - # openmm >= 7.6 - from openmm import app - from openmm.app import element -except ImportError: - # openmm < 7.6 (requires DeepMind patch) - from simtk.openmm import app - from simtk.openmm.app import element +from openmm import app +from openmm.app import element def fix_pdb(pdbfile, alterations_info): diff --git a/openfold/np/relax/utils.py b/openfold/np/relax/utils.py index b844953..fc19a91 100644 --- a/openfold/np/relax/utils.py +++ b/openfold/np/relax/utils.py @@ -18,14 +18,8 @@ import io from openfold.np import residue_constants from Bio import PDB import numpy as np -try: - # openmm >= 7.6 - from openmm import app as openmm_app - from openmm.app.internal.pdbstructure import PdbStructure -except ImportError: - # openmm < 7.6 (requires DeepMind patch) - from simtk.openmm import app as openmm_app - from simtk.openmm.app.internal.pdbstructure import PdbStructure +from openmm import app as openmm_app +from openmm.app.internal.pdbstructure import PdbStructure def overwrite_pdb_coordinates(pdb_str: str, pos) -> str: diff --git a/scripts/install_third_party_dependencies.sh b/scripts/install_third_party_dependencies.sh index e334159..3756a87 100755 --- a/scripts/install_third_party_dependencies.sh +++ b/scripts/install_third_party_dependencies.sh @@ -1,49 +1,18 @@ #!/bin/bash -CONDA_INSTALL_URL=${CONDA_INSTALL_URL:-"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"} - -source scripts/vars.sh - -# Install Miniconda locally -rm -rf lib/conda -rm -f /tmp/Miniconda3-latest-Linux-x86_64.sh -wget -P /tmp \ - "${CONDA_INSTALL_URL}" \ - && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p lib/conda \ - && rm /tmp/Miniconda3-latest-Linux-x86_64.sh - -# Grab conda-only packages -export PATH=lib/conda/bin:$PATH -lib/conda/bin/python3 -m pip install nvidia-pyindex -conda env create --name=${ENV_NAME} -f environment.yml -source scripts/activate_conda_env.sh - -echo "Attempting to install FlashAttention" -git clone https://github.com/HazyResearch/flash-attention -CUR_DIR=$PWD -cd flash-attention -git checkout 5b838a8bef -python3 setup.py install -cd $CUR_DIR - -# Install DeepMind's OpenMM patch -OPENFOLD_DIR=$PWD -pushd lib/conda/envs/$ENV_NAME/lib/python3.9/site-packages/ \ - && patch -p0 < $OPENFOLD_DIR/lib/openmm.patch \ - && popd # Download folding resources -wget --no-check-certificate -P openfold/resources \ +wget -N --no-check-certificate -P openfold/resources \ https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt # Certain tests need access to this file mkdir -p tests/test_data/alphafold/common ln -rs openfold/resources/stereo_chemical_props.txt tests/test_data/alphafold/common -echo "Downloading OpenFold parameters..." -bash scripts/download_openfold_params.sh openfold/resources - -echo "Downloading AlphaFold parameters..." -bash scripts/download_alphafold_params.sh openfold/resources - # Decompress test data gunzip -c tests/test_data/sample_feats.pickle.gz > tests/test_data/sample_feats.pickle + +python setup.py install + +export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH +# This setting is used to fix a worker assignment issue during data loading +conda env config vars set KMP_AFFINITY=none