Source code for bioblend.galaxy.histories

"""
Contains possible interactions with the Galaxy Histories
"""
import bioblend
from bioblend.galaxy.client import Client

import os
import re
import shutil
import urlparse
import urllib2
import time


[docs]class HistoryClient(Client): def __init__(self, galaxy_instance): self.module = 'histories' super(HistoryClient, self).__init__(galaxy_instance)
[docs] def create_history(self, name=None): """ Create a new history, optionally setting the ``name``. """ payload = {} if name is not None: payload['name'] = name return Client._post(self, payload)
[docs] def get_histories(self, history_id=None, name=None, deleted=False): """ Get all histories or filter the specific one(s) via the provided ``name`` or ``history_id``. Provide only one argument, ``name`` or ``history_id``, but not both. If ``deleted`` is set to ``True``, return histories that have been deleted. Return a list of history element dicts. If more than one history matches the given ``name``, return the list of all the histories with the given name. """ if history_id is not None and name is not None: raise ValueError('Provide only one argument between name or history_id, but not both') histories = Client._get(self, deleted=deleted) if history_id is not None: history = next((_ for _ in histories if _['id'] == history_id), None) histories = [history] if history is not None else [] elif name is not None: histories = [_ for _ in histories if _['name'] == name] return histories
[docs] def show_history(self, history_id, contents=False, deleted=None, visible=None, details=None, types=None): """ Get details of a given history. By default, just get the history meta information. If ``contents`` is set to ``True``, get the complete list of datasets in the given history. ``deleted``, ``visible``, and ``details`` are used only if ``contents`` is ``True`` and are used to modify the datasets returned and their contents. Set ``details`` to 'all' to get more information about each dataset. """ params = {} if contents: if details: params['details'] = details if deleted is not None: params['deleted'] = deleted if visible is not None: params['visible'] = visible if types is not None: params['types'] = types.join(",") return Client._get(self, id=history_id, contents=contents, params=params)
[docs] def delete_dataset(self, history_id, dataset_id): """ Mark corresponding dataset as deleted. """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, dataset_id]) Client._delete(self, payload={}, url=url)
[docs] def delete_dataset_collection(self, history_id, dataset_collection_id): """ Mark corresponding dataset collection as deleted. """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, "dataset_collections", dataset_collection_id]) Client._delete(self, payload={}, url=url)
[docs] def show_dataset(self, history_id, dataset_id): """ Get details about a given history dataset. The required ``history_id`` can be obtained from the datasets's history content details. """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, dataset_id]) return Client._get(self, url=url)
[docs] def show_dataset_collection(self, history_id, dataset_collection_id): """ Get details about a given history dataset collection. """ url = self.gi._make_url(self, history_id, contents=True) url = '/'.join([url, "dataset_collections", dataset_collection_id]) return Client._get(self, url=url)
[docs] def show_matching_datasets(self, history_id, name_filter=None): """ Get dataset details for matching datasets within a history. Only datasets whose name matches the ``name_filter`` regular expression will be returned; use plain strings for exact matches and None to match all datasets in the history. """ if isinstance(name_filter, basestring): name_filter = re.compile(name_filter + '$') return [self.show_dataset(history_id, h['id']) for h in self.show_history(history_id, contents=True) if name_filter is None or name_filter.match(h['name'])]
[docs] def show_dataset_provenance(self, history_id, dataset_id, follow=False): """ Get details related to how dataset was created (``id``, ``job_id``, ``tool_id``, ``stdout``, ``stderr``, ``parameters``, ``inputs``, etc...). If ``follow`` is ``True``, recursively fetch dataset provenance information for all inputs and their inputs, etc.... """ url = self.gi._make_url(self, history_id, contents=True) url = '/'.join([url, dataset_id, "provenance"]) return Client._get(self, url=url)
[docs] def update_history(self, history_id, name=None, annotation=None, **kwds): """ Update history metadata information. Some of the attributes that can be modified are documented below. :type history_id: str :param history_id: Encoded history ID :type name: str :param name: Replace history name with the given string :type annotation: str :param annotation: Replace history annotation with given string :type deleted: bool :param deleted: Mark or unmark history as deleted :type published: bool :param published: Mark or unmark history as published :type importable: bool :param importable: Mark or unmark history as importable :type tags: list :param tags: Replace history tags with the given list :rtype: status_code (int) """ kwds['name'] = name kwds['annotation'] = annotation return Client._put(self, kwds, id=history_id).status_code
[docs] def update_dataset(self, history_id, dataset_id, **kwds): """ Update history dataset metadata. Some of the attributes that can be modified are documented below. :type history_id: str :param history_id: Encoded history ID :type name: str :param name: Replace history dataset name with the given string :type annotation: str :param annotation: Replace history dataset annotation with given string :type deleted: bool :param deleted: Mark or unmark history dataset as deleted :type visible: bool :param visible: Mark or unmark history dataset as visible :rtype: status_code (int) """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, dataset_id]) return Client._put(self, payload=kwds, url=url).status_code
[docs] def update_dataset_collection(self, history_id, dataset_collection_id, **kwds): """ Update history dataset collection metadata. Some of the attributes that can be modified are documented below. :type history_id: str :param history_id: Encoded history ID :type name: str :param name: Replace history dataset collection name with the given string :type deleted: bool :param deleted: Mark or unmark history dataset collection as deleted. :type visible: bool :param visible: Mark or unmark history dataset collection as visible. :rtype: status_code (int) """ url = self.gi._make_url(self, history_id, contents=True) url = '/'.join([url, "dataset_collections", dataset_collection_id]) return Client._put(self, payload=kwds, url=url).status_code
[docs] def create_history_tag(self, history_id, tag): """ Create history tag :type history_id: str :param history_id: Encoded history ID :type tag: str :param tag: Add tag to history :rtype: json object :return: Return json object For example:: {'model_class':'HistoryTagAssociation', 'user_tname': 'NGS_PE_RUN', 'id': 'f792763bee8d277a', 'user_value': None} """ # empty payload since we are adding the new tag using the url payload = {} # creating the url url = self.url url = '/'.join([url, history_id, 'tags', tag]) return Client._post(self, payload, url=url)
[docs] def upload_dataset_from_library(self, history_id, lib_dataset_id): """ Upload a dataset into the history from a library. Requires the library dataset ID, which can be obtained from the library contents. """ payload = { 'content': lib_dataset_id, 'source': 'library', 'from_ld_id': lib_dataset_id, # compatibility with old API } return Client._post(self, payload, id=history_id, contents=True)
[docs] def create_dataset_collection(self, history_id, collection_description): try: collection_description = collection_description.to_dict() except AttributeError: pass payload = dict( name=collection_description["name"], type="dataset_collection", collection_type=collection_description["collection_type"], element_identifiers=collection_description["element_identifiers"], ) return Client._post(self, payload, id=history_id, contents=True)
[docs] def download_dataset(self, history_id, dataset_id, file_path, use_default_filename=True, to_ext=None): """ Download a ``dataset_id`` from history with ``history_id`` to a file on the local file system, saving it to ``file_path``. """ # TODO: Outsource to DatasetClient.download_dataset() to replace most of this. meta = self.show_dataset(history_id, dataset_id) d_type = to_ext if d_type is None and 'file_ext' in meta: d_type = meta['file_ext'] elif d_type is None and 'data_type' in meta: d_type = meta['data_type'] # TODO: Download this via the REST API. api/datasets/<dataset_id>/display download_url = 'datasets/' + meta['id'] + '/display?to_ext=' + d_type url = urlparse.urljoin(self.gi.base_url, download_url) req = urllib2.urlopen(url) if use_default_filename: file_local_path = os.path.join(file_path, meta['name']) else: file_local_path = file_path with open(file_local_path, 'wb') as fp: shutil.copyfileobj(req, fp)
[docs] def delete_history(self, history_id, purge=False): """ Delete a history. If ``purge`` is set to ``True``, also purge the history. .. note:: For the purge option to work, the Galaxy instance must have the ``allow_user_dataset_purge`` option set to ``True`` in the ``config/galaxy.ini`` configuration file. """ payload = {} if purge is True: payload['purge'] = purge return Client._delete(self, payload, id=history_id)
[docs] def undelete_history(self, history_id): """ Undelete a history """ url = self.gi._make_url(self, history_id, deleted=True) # Append the 'undelete' action to the history URL url = '/'.join([url, 'undelete']) return Client._post(self, payload={}, url=url)
[docs] def get_status(self, history_id): """ Returns the state of this history as a dictionary, with the following keys. 'state' = This is the current state of the history, such as ok, error, new etc. 'state_details' = Contains individual statistics for various dataset states. 'percent_complete' = The overall number of datasets processed to completion. """ state = {} history = self.show_history(history_id) state['state'] = history['state'] if history.get('state_details') is not None: state['state_details'] = history['state_details'] total_complete = sum(history['state_details'].itervalues()) if total_complete > 0: state['percent_complete'] = 100 * history['state_details']['ok'] / total_complete else: state['percent_complete'] = 0 return state
[docs] def get_current_history(self): """ Deprecated method. Just an alias for get_most_recently_used_history(). """ return self.get_most_recently_used_history()
[docs] def get_most_recently_used_history(self): """ Returns the current user's most recently used history (not deleted). """ url = self.gi._make_url(self, None) url = '/'.join([url, 'most_recently_used']) return Client._get(self, url=url)
[docs] def export_history(self, history_id, gzip=True, include_hidden=False, include_deleted=False, wait=False): """ Start a job to create an export archive for the given history. :type history_id: str :param history_id: history ID :type gzip: bool :param gzip: create .tar.gz archive if :obj:`True`, else .tar :type include_hidden: bool :param include_hidden: whether to include hidden datasets in the export :type include_deleted: bool :param include_deleted: whether to include deleted datasets in the export :type wait: bool :param wait: if :obj:`True`, block until the export is ready; else, return immediately :rtype: str :return: ``jeha_id`` of the export, or empty if ``wait`` is :obj:`False` and the export is not ready. """ params = { 'gzip': gzip, 'include_hidden': include_hidden, 'include_deleted': include_deleted, } url = '%s/exports' % self.gi._make_url(self, history_id) while True: r = Client._put(self, {}, url=url, params=params) if not wait or r.status_code == 200: break time.sleep(1) contents = r.json() if contents: jeha_id = contents['download_url'].rsplit('/', 1)[-1] else: jeha_id = '' # export is not ready return jeha_id
[docs] def download_history(self, history_id, jeha_id, outf, chunk_size=bioblend.CHUNK_SIZE): """ Download a history export archive. Use :meth:`export_history` to create an export. :type history_id: str :param history_id: history ID :type jeha_id: str :param jeha_id: jeha ID (this should be obtained via :meth:`export_history`) :type outf: file :param outf: output file object, open for writing :type chunk_size: int :param chunk_size: how many bytes at a time should be read into memory """ url = '%s/exports/%s' % ( self.gi._make_url(self, module_id=history_id), jeha_id ) r = self.gi.make_get_request(url, stream=True) r.raise_for_status() for chunk in r.iter_content(chunk_size): outf.write(chunk)