diff --git a/modules/io/doc/io.rst b/modules/io/doc/io.rst index 10be9a05532493ccaf23afa06cd49be89a8887a1..39a713dc980ad887052418b4d4b4667fdf83bd4e 100644 --- a/modules/io/doc/io.rst +++ b/modules/io/doc/io.rst @@ -89,12 +89,37 @@ behaviour. To get an entity equivalent to one loaded with :func:`LoadPDB`, set the `profile` and `process` arguments as follows: + .. code-block:: python + + with open('protein.pdb') as pdb_fd: + pdb_str = pdb.read() + ent = io.PDBStrToEntity(pdb_str, ost.io.profiles['DEFAULT'], True) + +Loading Molecular Structures From Remote Repositories +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`LoadPDB` already provides access to selected structural databases in +the internet when enabling the *remote* flag. Predefined +:class:`ost.io.remote.RemoteRepository` objects are available as + .. code-block:: python - with open('protein.pdb') as pdb_fd: - pdb_str = pdb.read() - ent = io.PDBStrToEntity(pdb_str, ost.io.profiles['DEFAULT'], True) + from ost.io import remote + repo_name = 'smtl' + repo = remote.REMOTE_REPOSITORIES.get(repo_name) + + # url for entry with id 1ake.1 + print(repo.URLForID('1ake.1')) + +where *repo_name* can be one of ['pdb', 'cif', 'pdb_redo', 'smtl']. +Instead of explicit access, you can directly fetch data using: + +.. autofunction:: ost.io.remote.RemoteGet + +.. autofunction:: ost.io.remote.RemoteLoad +.. autoclass:: ost.io.remote.RemoteRepository + :members: Saving Molecular Structures ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/modules/io/pymod/remote.py b/modules/io/pymod/remote.py index 42fee8bc76b9fe5a04e9ddac45f6f0fc071128cf..74617994f8a279d63348ced4596b4a48da8d87cb 100644 --- a/modules/io/pymod/remote.py +++ b/modules/io/pymod/remote.py @@ -26,6 +26,19 @@ class RemoteRepository: """ A remote repository represents a structural database accessible through the internet, e.g. the PDB or SWISS-MODEL template library. + + :param name: Name of the repository + :param url_pattern: URL pattern for repository. Required format is described + in :func:`URLForID` + :param type: Data format to expect at resolved URL must be in + ('pdb', 'cif') + :param id_transform: Transformation to apply to ID before resolving URL + in :func:`URLForID`. Must be in ('lower', 'upper') + + :type name: :class:`str` + :type url_pattern: :class:`str` + :type type: :class:`str` + :type id_transform: :class:`str` """ def __init__(self, name, url_pattern, type, id_transform='upper'): self.name = name @@ -36,6 +49,15 @@ class RemoteRepository: self.id_transform = id_transform def URLForID(self, id): + """ + Resolves URL given *url_pattern* and *id_transform* provided at object + initialization. + The *url_pattern* must contain substring '$ID'. Given *id*, the URL to + the structure gets constructed by applying *id_transform* and inserting it + at the location of '$ID'. e.g. 'https://files.rcsb.org/view/$ID.pdb' given + 1ake as *id* and 'upper' as *id_transform* resolves to: + 'https://files.rcsb.org/view/1AKE.pdb' + """ if self.id_transform == 'upper': id = id.upper() if self.id_transform == 'lower': @@ -43,6 +65,13 @@ class RemoteRepository: return self.url_pattern.replace('$ID', id) def Get(self, id): + """ + Resolves URL with :func:`URLForID`, dumps the content in a temporary file + and returns its path. + + :param id: ID to resolve + :type id: :class:`str` + """ remote_url = self.URLForID(id) tmp_file_suffix = '.%s' % self.type if remote_url.endswith('.gz'): @@ -65,6 +94,16 @@ class RemoteRepository: return tmp_file def Load(self, id): + """ + Resolves URL with :func:`URLForID` and directly loads/returns the according + :class:`ost.mol.EntityHandle`. Loading invokes the + :func:`ost.io.LoadPDB`/:func:`ost.io.LoadMMCIF` with default parameterization. If you need + custom settings, you might want to consider to call :func:`Get` and do the + loading manually. + + :param id: ID to resolve + :type id: :class:`str` + """ tmp_file = self.Get(id) if self.type == 'pdb': return LoadPDB(tmp_file.name) @@ -83,12 +122,26 @@ REMOTE_REPOSITORIES = { } def RemoteGet(id, from_repo='pdb'): + """ + Invokes :func:`RemoteRepository.Get` on predefined repositories + ('pdb', 'smtl', 'cif', 'pdb_redo') + + :param from_repo: One of the predefined repositories + :type from_repo: :class:`str` + """ remote_repo = REMOTE_REPOSITORIES.get(from_repo, None) if not remote_repo: raise ValueError('%s is not a valid repository' % from_repo) return remote_repo.Get(id) def RemoteLoad(id, from_repo='pdb'): + """ + Invokes :func:`RemoteRepository.Load` on predefined repositories + ('pdb', 'smtl', 'cif', 'pdb_redo') + + :param from_repo: One of the predefined repositories + :type from_repo: :class:`str` + """ remote_repo = REMOTE_REPOSITORIES.get(from_repo, None) if not remote_repo: raise ValueError('%s is not a valid repository' % from_repo)