Merge branch 'main' into multimer
This commit is contained in:
commit
5eacd8b689
|
@ -0,0 +1,7 @@
|
|||
version: 2
|
||||
updates:
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
|
@ -10,6 +10,6 @@ jobs:
|
|||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build the Docker image
|
||||
run: docker build . --file Dockerfile --tag openfold:$(date +%s)
|
|
@ -4,8 +4,8 @@ jobs:
|
|||
undefined_names:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- run: pip install --upgrade pip
|
||||
- run: pip install flake8
|
||||
- run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
|
|
13
Dockerfile
13
Dockerfile
|
@ -1,4 +1,4 @@
|
|||
FROM nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu18.04
|
||||
FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
|
||||
|
||||
# metainformation
|
||||
LABEL org.opencontainers.image.version = "1.0.0"
|
||||
|
@ -13,24 +13,23 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
|
|||
|
||||
RUN apt-get update && apt-get install -y wget libxml2 cuda-minimal-build-11-3 libcusparse-dev-11-3 libcublas-dev-11-3 libcusolver-dev-11-3 git
|
||||
RUN wget -P /tmp \
|
||||
"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \
|
||||
&& bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
|
||||
&& rm /tmp/Miniconda3-latest-Linux-x86_64.sh
|
||||
"https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" \
|
||||
&& bash /tmp/Miniforge3-Linux-x86_64.sh -b -p /opt/conda \
|
||||
&& rm /tmp/Miniforge3-Linux-x86_64.sh
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
|
||||
COPY environment.yml /opt/openfold/environment.yml
|
||||
|
||||
# installing into the base environment since the docker container wont do anything other than run openfold
|
||||
RUN conda env update -n base --file /opt/openfold/environment.yml && conda clean --all
|
||||
RUN mamba env update -n base --file /opt/openfold/environment.yml && mamba clean --all
|
||||
RUN export LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH}
|
||||
|
||||
COPY openfold /opt/openfold/openfold
|
||||
COPY scripts /opt/openfold/scripts
|
||||
COPY run_pretrained_openfold.py /opt/openfold/run_pretrained_openfold.py
|
||||
COPY train_openfold.py /opt/openfold/train_openfold.py
|
||||
COPY setup.py /opt/openfold/setup.py
|
||||
COPY lib/openmm.patch /opt/openfold/lib/openmm.patch
|
||||
RUN wget -q -P /opt/openfold/openfold/resources \
|
||||
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
|
||||
RUN patch -p0 -d /opt/conda/lib/python3.9/site-packages/ < /opt/openfold/lib/openmm.patch
|
||||
WORKDIR /opt/openfold
|
||||
RUN python3 setup.py install
|
||||
|
|
38
README.md
38
README.md
|
@ -48,37 +48,19 @@ and one of {`jackhmmer`, [MMseqs2](https://github.com/soedinglab/mmseqs2) (night
|
|||
installed on on your system. You'll need `git-lfs` to download OpenFold parameters.
|
||||
Finally, some download scripts require `aria2c` and `aws`.
|
||||
|
||||
For convenience, we provide a script that installs Miniconda locally, creates a
|
||||
`conda` virtual environment, installs all Python dependencies, and downloads
|
||||
useful resources, including both sets of model parameters. Run:
|
||||
This package is currently supported for CUDA 11 and Pytorch 1.12
|
||||
|
||||
```bash
|
||||
scripts/install_third_party_dependencies.sh
|
||||
```
|
||||
To install:
|
||||
1. Clone the repository, e.g. `git clone https://github.com/aqlaboratory/openfold.git`
|
||||
1. From the `openfold` repo:
|
||||
- Create a [Mamba]("https://github.com/conda-forge/miniforge/releases/latest/download/) environment, e.g.
|
||||
`mamba env create -n openfold_env -f environment.yml`
|
||||
Mamba is recommended as the dependencies required by OpenFold are quite large and mamba can speed up the process.
|
||||
- Activate the environment, e.g `conda activate openfold_env`
|
||||
1. Run `scripts/install_third_party_dependencies.sh` to configure kernels and folding resources.
|
||||
|
||||
To activate the environment, run:
|
||||
For some systems, it may help to append the Conda environment library path to `$LD_LIBRARY_PATH`. The `install_third_party_dependencies.sh` script does this once, but you may need this for each bash instance.
|
||||
|
||||
```bash
|
||||
source scripts/activate_conda_env.sh
|
||||
```
|
||||
|
||||
To deactivate it, run:
|
||||
|
||||
```bash
|
||||
source scripts/deactivate_conda_env.sh
|
||||
```
|
||||
|
||||
With the environment active, compile OpenFold's CUDA kernels with
|
||||
|
||||
```bash
|
||||
python3 setup.py install
|
||||
```
|
||||
|
||||
To install the HH-suite to `/usr/bin`, run
|
||||
|
||||
```bash
|
||||
# scripts/install_hh_suite.sh
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
|
|
|
@ -1,32 +1,37 @@
|
|||
name: openfold_venv
|
||||
name: openfold-venv
|
||||
channels:
|
||||
- conda-forge
|
||||
- bioconda
|
||||
- pytorch
|
||||
dependencies:
|
||||
- conda-forge::python=3.9
|
||||
- conda-forge::setuptools=59.5.0
|
||||
- conda-forge::pip
|
||||
- conda-forge::openmm=7.5.1
|
||||
- conda-forge::pdbfixer
|
||||
- conda-forge::cudatoolkit==11.3.*
|
||||
- python=3.9
|
||||
- libgcc=7.2
|
||||
- setuptools=59.5.0
|
||||
- pip
|
||||
- openmm=7.7
|
||||
- pdbfixer
|
||||
- cudatoolkit==11.3.*
|
||||
- pytorch-lightning==1.5.10
|
||||
- biopython==1.79
|
||||
- pandas==2.0.2
|
||||
- numpy==1.21
|
||||
- PyYAML==5.4.1
|
||||
- requests
|
||||
- scipy==1.7
|
||||
- tqdm==4.62.2
|
||||
- typing-extensions==3.10
|
||||
- wandb==0.12.21
|
||||
- modelcif==0.7
|
||||
- awscli
|
||||
- ml-collections
|
||||
- aria2
|
||||
- git
|
||||
- bioconda::hmmer==3.3.2
|
||||
- bioconda::hhsuite==3.3.0
|
||||
- bioconda::kalign2==2.04
|
||||
- pytorch::pytorch=1.12.*
|
||||
- pip:
|
||||
- biopython==1.79
|
||||
- deepspeed==0.5.10
|
||||
- dm-tree==0.1.6
|
||||
- ml-collections==0.1.0
|
||||
- pandas==2.0.2
|
||||
- numpy==1.21.2
|
||||
- PyYAML==5.4.1
|
||||
- requests==2.26.0
|
||||
- scipy==1.7.1
|
||||
- tqdm==4.62.2
|
||||
- typing-extensions==3.10.0.2
|
||||
- pytorch_lightning==1.5.10
|
||||
- wandb==0.12.21
|
||||
- modelcif==0.7
|
||||
- git+https://github.com/NVIDIA/dllogger.git
|
||||
- git+https://github.com/Dao-AILab/flash-attention.git@5b838a8
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
Index: simtk/openmm/app/topology.py
|
||||
===================================================================
|
||||
--- simtk.orig/openmm/app/topology.py
|
||||
+++ simtk/openmm/app/topology.py
|
||||
@@ -356,19 +356,35 @@
|
||||
def isCyx(res):
|
||||
names = [atom.name for atom in res._atoms]
|
||||
return 'SG' in names and 'HG' not in names
|
||||
+ # This function is used to prevent multiple di-sulfide bonds from being
|
||||
+ # assigned to a given atom. This is a DeepMind modification.
|
||||
+ def isDisulfideBonded(atom):
|
||||
+ for b in self._bonds:
|
||||
+ if (atom in b and b[0].name == 'SG' and
|
||||
+ b[1].name == 'SG'):
|
||||
+ return True
|
||||
+
|
||||
+ return False
|
||||
|
||||
cyx = [res for res in self.residues() if res.name == 'CYS' and isCyx(res)]
|
||||
atomNames = [[atom.name for atom in res._atoms] for res in cyx]
|
||||
for i in range(len(cyx)):
|
||||
sg1 = cyx[i]._atoms[atomNames[i].index('SG')]
|
||||
pos1 = positions[sg1.index]
|
||||
+ candidate_distance, candidate_atom = 0.3*nanometers, None
|
||||
for j in range(i):
|
||||
sg2 = cyx[j]._atoms[atomNames[j].index('SG')]
|
||||
pos2 = positions[sg2.index]
|
||||
delta = [x-y for (x,y) in zip(pos1, pos2)]
|
||||
distance = sqrt(delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2])
|
||||
- if distance < 0.3*nanometers:
|
||||
- self.addBond(sg1, sg2)
|
||||
+ if distance < candidate_distance and not isDisulfideBonded(sg2):
|
||||
+ candidate_distance = distance
|
||||
+ candidate_atom = sg2
|
||||
+ # Assign bond to closest pair.
|
||||
+ if candidate_atom:
|
||||
+ self.addBond(sg1, candidate_atom)
|
||||
+
|
||||
+
|
||||
|
||||
class Chain(object):
|
||||
"""A Chain object represents a chain within a Topology."""
|
|
@ -28,18 +28,10 @@ import openfold.utils.loss as loss
|
|||
from openfold.np.relax import cleanup, utils
|
||||
import ml_collections
|
||||
import numpy as np
|
||||
try:
|
||||
# openmm >= 7.6
|
||||
import openmm
|
||||
from openmm import unit
|
||||
from openmm import app as openmm_app
|
||||
from openmm.app.internal.pdbstructure import PdbStructure
|
||||
except ImportError:
|
||||
# openmm < 7.6 (requires DeepMind patch)
|
||||
from simtk import openmm
|
||||
from simtk import unit
|
||||
from simtk.openmm import app as openmm_app
|
||||
from simtk.openmm.app.internal.pdbstructure import PdbStructure
|
||||
import openmm
|
||||
from openmm import unit
|
||||
from openmm import app as openmm_app
|
||||
from openmm.app.internal.pdbstructure import PdbStructure
|
||||
|
||||
ENERGY = unit.kilocalories_per_mole
|
||||
LENGTH = unit.angstroms
|
||||
|
|
|
@ -20,14 +20,8 @@ cases like removing chains of length one (see clean_structure).
|
|||
import io
|
||||
|
||||
import pdbfixer
|
||||
try:
|
||||
# openmm >= 7.6
|
||||
from openmm import app
|
||||
from openmm.app import element
|
||||
except ImportError:
|
||||
# openmm < 7.6 (requires DeepMind patch)
|
||||
from simtk.openmm import app
|
||||
from simtk.openmm.app import element
|
||||
from openmm import app
|
||||
from openmm.app import element
|
||||
|
||||
|
||||
def fix_pdb(pdbfile, alterations_info):
|
||||
|
|
|
@ -18,14 +18,8 @@ import io
|
|||
from openfold.np import residue_constants
|
||||
from Bio import PDB
|
||||
import numpy as np
|
||||
try:
|
||||
# openmm >= 7.6
|
||||
from openmm import app as openmm_app
|
||||
from openmm.app.internal.pdbstructure import PdbStructure
|
||||
except ImportError:
|
||||
# openmm < 7.6 (requires DeepMind patch)
|
||||
from simtk.openmm import app as openmm_app
|
||||
from simtk.openmm.app.internal.pdbstructure import PdbStructure
|
||||
from openmm import app as openmm_app
|
||||
from openmm.app.internal.pdbstructure import PdbStructure
|
||||
|
||||
|
||||
def overwrite_pdb_coordinates(pdb_str: str, pos) -> str:
|
||||
|
|
|
@ -1,49 +1,18 @@
|
|||
#!/bin/bash
|
||||
CONDA_INSTALL_URL=${CONDA_INSTALL_URL:-"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"}
|
||||
|
||||
source scripts/vars.sh
|
||||
|
||||
# Install Miniconda locally
|
||||
rm -rf lib/conda
|
||||
rm -f /tmp/Miniconda3-latest-Linux-x86_64.sh
|
||||
wget -P /tmp \
|
||||
"${CONDA_INSTALL_URL}" \
|
||||
&& bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p lib/conda \
|
||||
&& rm /tmp/Miniconda3-latest-Linux-x86_64.sh
|
||||
|
||||
# Grab conda-only packages
|
||||
export PATH=lib/conda/bin:$PATH
|
||||
lib/conda/bin/python3 -m pip install nvidia-pyindex
|
||||
conda env create --name=${ENV_NAME} -f environment.yml
|
||||
source scripts/activate_conda_env.sh
|
||||
|
||||
echo "Attempting to install FlashAttention"
|
||||
git clone https://github.com/HazyResearch/flash-attention
|
||||
CUR_DIR=$PWD
|
||||
cd flash-attention
|
||||
git checkout 5b838a8bef
|
||||
python3 setup.py install
|
||||
cd $CUR_DIR
|
||||
|
||||
# Install DeepMind's OpenMM patch
|
||||
OPENFOLD_DIR=$PWD
|
||||
pushd lib/conda/envs/$ENV_NAME/lib/python3.9/site-packages/ \
|
||||
&& patch -p0 < $OPENFOLD_DIR/lib/openmm.patch \
|
||||
&& popd
|
||||
|
||||
# Download folding resources
|
||||
wget --no-check-certificate -P openfold/resources \
|
||||
wget -N --no-check-certificate -P openfold/resources \
|
||||
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
|
||||
|
||||
# Certain tests need access to this file
|
||||
mkdir -p tests/test_data/alphafold/common
|
||||
ln -rs openfold/resources/stereo_chemical_props.txt tests/test_data/alphafold/common
|
||||
|
||||
echo "Downloading OpenFold parameters..."
|
||||
bash scripts/download_openfold_params.sh openfold/resources
|
||||
|
||||
echo "Downloading AlphaFold parameters..."
|
||||
bash scripts/download_alphafold_params.sh openfold/resources
|
||||
|
||||
# Decompress test data
|
||||
gunzip -c tests/test_data/sample_feats.pickle.gz > tests/test_data/sample_feats.pickle
|
||||
|
||||
python setup.py install
|
||||
|
||||
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
|
||||
# This setting is used to fix a worker assignment issue during data loading
|
||||
conda env config vars set KMP_AFFINITY=none
|
||||
|
|
Loading…
Reference in New Issue