Source code for metaseq.minibrowser

"""
Module for spawning mini genome browsers using a plugin structure, making it
possible to build rather complex mini-browsers.  The goal is to point the
mini-browser to some data, and call its plot() method with a feature.  This
will spawn a new figure showing the data for that interval.

MiniBrowser classes are just a general way of mapping data-manipulation or
data-visualization methods to an Axes on which the data should be displayed.

To make a new subclass:

1. Create one or more methods that accept an Axes object and a pybedtools
   Interval object and return a feature.  The simplest do-nothing method would
   be::

        def my_panel(self, ax, feature)
            return feature

   A more useful method might be one that plots genomic signal over the
   region::

        def my_panel(self, ax, feature):
            # for simplicity, assume just use the first genomic_signal
            gs = self.genomic_signal_objs[0]
            x, y = gs.local_coverage(feature, bins=100)
            ax.plot(x, y, **kwargs)
            ax.axis('tight')
            return feature

2. Then, override the `panels()` method.  This method:

    * Creates Axes as needed; assumes that self.make_fig() has already been
      called so that self.fig is available.
    * Returns a list of (ax, method) tuples.  This list maps created Axes to
      methods that should operate on them (like `my_panel` method above).

    For example::

        def panels(self):
            ax = self.fig.add_subplot(111)
            return [(ax, self.my_panel)]


A figure is spawned by calling the `plot` method on a pybedtools genomic
interval, e.g.::

    s = SignalMiniBrowser(ip, control])
    s.plot(feature)

"""
from copy import copy
from matplotlib import pyplot as plt
from pybedtools.contrib.plotting import Track
import pybedtools
import gffutils
from gffutils.helpers import asinterval
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.gridspec as gridspec
from matplotlib.ticker import FormatStrFormatter
import _genomic_signal


[docs]class BaseMiniBrowser(object): """ Base class for plotting a genomic region. This class is designed to be sub-classed, so it really just a shell with some example methods filled in. """ _all_figures = []
[docs] def __init__(self, genomic_signal_objs): self.genomic_signal_objs = genomic_signal_objs
[docs] def close_all(self): """ Close all figures spawned by this class. """ for f in self._all_figures: plt.close(f)
[docs] def plot(self, feature): """ Spawns a new figure showing data for `feature`. :param feature: A `pybedtools.Interval` object Using the pybedtools.Interval `feature`, creates figure specified in :meth:`BaseMiniBrowser.make_fig` and plots data on panels according to `self.panels()`. """ if isinstance(feature, gffutils.Feature): feature = asinterval(feature) self.make_fig() axes = [] for ax, method in self.panels(): feature = method(ax, feature) axes.append(ax) return axes
[docs] def make_fig(self): """ Figure constructor, called before `self.plot()` """ self.fig = plt.figure(figsize=(8, 4)) self._all_figures.append(self.fig)
[docs] def panels(self): """ Method to be overriden by subclasses. This method should create Axes objects on self.fig and register which methods operate on which axes. Each method's signature should be func(self, ax, feature), and it will take a pybedtools.Interval `feature` and plot it or something derived from it onto `ax`. It should return a pybedtools.Interval object, usually just the same one that was passed in. """ ax = self.fig.add_subplot(111) return [(ax, self.example_panel)]
[docs] def example_panel(self, ax, feature): """ A example panel that just prints the text of the feature. """ txt = '%s:%s-%s' % (feature.chrom, feature.start, feature.stop) ax.text(0.5, 0.5, txt, transform=ax.transAxes) return feature
class ChIPSeqMiniBrowser(BaseMiniBrowser): def __init__(self, ip, control, db=None, peaks=None, local_coverage_kwargs=dict(stranded=False), ip_style=None, control_style=None, peaks_style=None, max_bins=500): """ Mini-browser to show IP and control signal, along with optional peaks and gene models. See module-level documentation for details. Parameters ---------- ip, control : genomic_signal object These can be any kind of genomic signal object. If BamSignal, then the signal will be scaled by million mapped reads. db : filename or gffutils.FeatureDB Optional database of annotations. If provided, gene models will be plotted on an additional axes. peaks : filename or pybedtools.BedTool object Optional file of called peaks. If provided, will be plotted on an additional axes. local_coverage_kwargs : dict Kwargs to pass on to the local_coverage method of `ip` and `control`. It is recommended that at least `stranded=False` is included, since the mini-browser always displays the plus strand. ip_style, control_style : dict Kwargs to pass to Axes.fill_between (e.g. colors, alpha, line styles) peaks_style : dict Kwargs to pass to PolyCollection (e.g., face_color, edgewidth) max_bins : int Maximum number of bins to use when getting genomic_signal. This can be overridden by providing the `bins` kwarg in `local_coverage_kwargs`. Notes ----- After creation, settings can be changed in between calls to the `plot()` method. For example, the self.settings dictionary, which contains arguments passed to the gffutils.contrib.plotting.Gene class, can be modified to specify which kinds of features should be plotted. The `gs` attribute contains the matplotlib.gridspec.GridSpec object, which can be used to modify the relative sizes of the subplots. """ super(ChIPSeqMiniBrowser, self).__init__([ip, control]) self.local_coverage_kwargs = local_coverage_kwargs or {} self.ip = ip self.control = control if isinstance(db, basestring): db = gffutils.FeatureDB(db) self.db = db self.ip_style = ip_style or {} self.peaks_style = peaks_style or {} self.control_style = control_style or {} if peaks is not None: peaks = pybedtools.BedTool(peaks) self.peaks = peaks self.max_bins = max_bins self.settings = { 'transcripts': None, 'cds': ['CDS'], 'utrs': ['exon'], 'color': '0.5', 'featuretype': 'gene', } def panels(self): self.fig.set_facecolor('w') method_dispatch = { 'ip': self.ip_panel, 'control': self.control_panel, 'peaks': self.peaks_panel, 'genes': self.genes_panel} if self.db and self.peaks: gs = gridspec.GridSpec(4, 1, height_ratios=[1, 1, .15, .5]) ip_ax = plt.subplot(gs[0]) control_ax = plt.subplot(gs[1], sharex=ip_ax, sharey=ip_ax) peaks_ax = plt.subplot(gs[2], sharex=ip_ax) gene_ax = plt.subplot(gs[3], sharex=ip_ax) axes = {'ip': ip_ax, 'control': control_ax, 'peaks': peaks_ax, 'genes': gene_ax, } elif self.db and self.peaks is None: gs = gridspec.GridSpec(3, 1, height_ratios=[1, 1, .5]) ip_ax = plt.subplot(gs[0]) control_ax = plt.subplot(gs[1], sharex=ip_ax, sharey=ip_ax) gene_ax = plt.subplot(gs[2], sharex=ip_ax) axes = {'ip': ip_ax, 'control': control_ax, 'genes': gene_ax, } elif self.db is None and self.peaks: gs = gridspec.GridSpec(3, 1, height_ratios=[1, 1, .3]) ip_ax = plt.subplot(gs[0]) control_ax = plt.subplot(gs[1], sharex=ip_ax, sharey=ip_ax) peaks_ax = plt.subplot(gs[2], sharex=ip_ax) axes = {'ip': ip_ax, 'control': control_ax, 'peaks': peaks_ax, } elif self.db is None and self.peaks is None: gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1]) ip_ax = plt.subplot(gs[0]) control_ax = plt.subplot(gs[1], sharex=ip_ax, sharey=ip_ax) axes = {'ip': ip_ax, 'control': control_ax, } self.axes = axes self.gridspec = gs # Order shouldn't matter, but just in case... mapping = [] for kind in ['ip', 'control', 'peaks', 'genes']: if kind in axes: mapping.append((axes[kind], method_dispatch[kind])) self._first_ax = mapping[0][0] self._last_ax = mapping[-1][0] return mapping def _bins(self, feature): return min(len(feature), self.max_bins) def _zoomed_feature(self, feature): extra = int(self.settings['zoom'] * len(feature) / 2) feature = copy(feature) feature.start -= extra feature.stop += extra return feature def ip_panel(self, ax, feature): bins = self._bins(feature) x, y = self.ip.local_coverage( feature, bins=bins, **self.local_coverage_kwargs) if isinstance(self.ip, _genomic_signal.BamSignal): y /= (self.ip.mapped_read_count() / 1e6) ax.fill_between(x, y, y2=0, **self.ip_style) ax.axis('tight') ax.spines['right'].set_color('None') ax.spines['top'].set_color('None') ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') return feature def control_panel(self, ax, feature): bins = self._bins(feature) x, y = self.control.local_coverage( feature, bins=bins, **self.local_coverage_kwargs) if isinstance(self.control, _genomic_signal.BamSignal): y /= (self.control.mapped_read_count() / 1e6) ax.fill_between(x, y, y2=0, **self.control_style) ax.axis('tight') ax.spines['right'].set_color('None') ax.spines['top'].set_color('None') ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') return feature def peaks_panel(self, ax, feature): hits = self.peaks.intersect([feature], u=True) track = Track(hits, **self.peaks_style) ax.add_collection(track) for x in ( ax.get_yticklines() + ax.get_xticklines() + ax.get_yticklabels() + ax.get_xticklabels() ): x.set_visible(False) ax.set_ylabel('Peaks', rotation=0, horizontalalignment='right', verticalalignment='center') ax.yaxis.set_ticks_position('none') ax.xaxis.set_ticks_position('none') ax.set_frame_on(False) return feature def plot(self, feature): self._current_feature = feature axes = super(ChIPSeqMiniBrowser, self).plot(feature) for ax in self.fig.axes: if ax is not self._last_ax: for txt in ax.get_xticklabels(): txt.set_visible(False) self.fig.subplots_adjust(wspace=0.05) self._first_ax.axis(xmin=feature.start, xmax=feature.stop) self._last_ax.set_xlabel(feature.chrom) self._last_ax.xaxis.set_major_formatter(FormatStrFormatter("%d")) return axes def coords(self): chrom = self._current_feature.chrom start, stop, _, _ = self._first_ax.axis() return '%s:%s-%s' % (chrom, int(start), int(stop)) def genes_panel(self, ax, feature): """ Plots gene models on an Axes. Queries the database :param ax: matplotlib.Axes object :param feature: pybedtools.Interval """ from gffutils.contrib.plotting import Gene extent = [feature.start, feature.stop] nearby_genes = self.db.region( (feature.chrom, feature.start, feature.stop), featuretype='gene') ybase = 0 ngenes = 0 for nearby_gene in nearby_genes: # TODO: there should be a better way of identifying which gene is # the same as the feature requested. Might need to expose an "ID" # kwarg. try: if nearby_gene['ID'][0] == feature['ID']: color = '0.2' else: color = '0.5' except KeyError: color = '0.5' ngenes += 1 extent.extend([nearby_gene.start, nearby_gene.stop]) gene_collection = Gene( self.db, nearby_gene, transcripts=None, cds=['CDS'], utrs=['exon'], ybase=ybase, color=color) gene_collection.name = nearby_gene.id gene_collection.add_to_ax(ax) ybase += gene_collection.max_y xmin = min(extent) xmax = max(extent) ymax = ngenes # 1% padding seems to work well padding = (xmax - xmin) * 0.01 ax.axis('tight') # add lines indicating extent of current feature # vline_kwargs = dict(color='k', linestyle='--') # ax.axvline(feature.start, **vline_kwargs) # ax.axvline(feature.stop, **vline_kwargs) # Make a new feature to represent the region plus surrounding genes interval = pybedtools.create_interval_from_list(feature.fields) interval.strand = '.' for txt in ax.get_yticklabels(): txt.set_visible(False) for tick in ax.get_yticklines(): tick.set_visible(False) ax.set_ylabel('Genes') ax.spines['right'].set_color('None') ax.spines['left'].set_color('None') ax.spines['top'].set_color('None') ax.yaxis.set_ticks_position('none') ax.xaxis.set_ticks_position('bottom') ax.set_ylabel('Genes', rotation=0, horizontalalignment='right', verticalalignment='center') return interval
[docs]class SignalMiniBrowser(BaseMiniBrowser):
[docs] def __init__(self, genomic_signal_objs, local_coverage_kwargs=None, plotting_kwargs=None): """ Base class for plotting genomic signal. Plots genomic signal over a particular area of the genome. Designed to be extended by subclasses, but can stand alone if all you want is a simple one-panel browser. :param genomic_signal_objs: list of genomic signal objects (e.g., :class:`metaseq.genomic_signal.BamSignal` instances). :param local_coverage_kwargs: a dictionary of kwargs to send to each genomic signals' `local_coverage()` method, e.g.:: local_coverage_kwargs = dict(fragment_size=200). :param plotting_kwargs: a list of dictionaries, one for each genomic signals object, e.g,:: plotting_kwargs = [dict(color='r', label='IP', dict(color='k', label='input')] """ super(SignalMiniBrowser, self).__init__(genomic_signal_objs) self.plotting_kwargs = \ plotting_kwargs or [{} for i in genomic_signal_objs] self.local_coverage_kwargs = local_coverage_kwargs or {}
[docs] def panels(self): """ Add a single panel to the figure """ ax = self.fig.add_subplot(111) return [(ax, self.signal_panel)]
[docs] def signal_panel(self, ax, feature): """ Plots each genomic signal as a line using the corresponding plotting_kwargs """ for gs, kwargs in zip(self.genomic_signal_objs, self.plotting_kwargs): x, y = gs.local_coverage(feature, **self.local_coverage_kwargs) ax.plot(x, y, **kwargs) ax.axis('tight') return feature
[docs]class GeneModelMiniBrowser(SignalMiniBrowser):
[docs] def __init__(self, genomic_signal_objs, db, **kwargs): """ Mini-browser to show a signal panel on top and gene models on the bottom. :param genomic_signal_objs: a list of genomic_signals objects :param db: a `gffutils.FeatureDB` """ super(GeneModelMiniBrowser, self).__init__( genomic_signal_objs, **kwargs) self.db = db
[docs] def panels(self): """ Add 2 panels to the figure, top for signal and bottom for gene models """ ax1 = self.fig.add_subplot(211) ax2 = self.fig.add_subplot(212, sharex=ax1) return (ax2, self.gene_panel), (ax1, self.signal_panel)
[docs] def gene_panel(self, ax, feature): """ Plots gene models on an Axes. Queries the database :param ax: matplotlib.Axes object :param feature: pybedtools.Interval """ from gffutils.contrib.plotting import Gene extent = [feature.start, feature.stop] nearby_genes = self.db.region( (feature.chrom, feature.start, feature.stop), featuretype='gene') ybase = 0 ngenes = 0 for nearby_gene in nearby_genes: ngenes += 1 extent.extend([nearby_gene.start, nearby_gene.stop]) gene_collection = Gene( self.db, nearby_gene, transcripts=['mRNA'], cds=['CDS'], utrs=['exon'], ybase=ybase, color="0.5", picker=5) gene_collection.name = nearby_gene.id gene_collection.add_to_ax(ax) ybase += gene_collection.max_y xmin = min(extent) xmax = max(extent) ymax = ngenes # 1% padding seems to work well padding = (xmax - xmin) * 0.01 ax.axis('tight') # add lines indicating extent of current feature vline_kwargs = dict(color='k', linestyle='--') ax.axvline(feature.start, **vline_kwargs) ax.axvline(feature.stop, **vline_kwargs) # Make a new feature to represent the region plus surrounding genes interval = pybedtools.create_interval_from_list(feature.fields) interval.start = xmin - padding interval.stop = xmax + padding interval.strand = '.' return interval
class PeakMiniBrowser(SignalMiniBrowser): def __init__(self, genomic_signal_objs, bed, **kwargs): """ Signal on the top panel, peaks in the bottom panel. `bed` is a filename or BedTool object (or list of these things) that will be used to make a pybedtools.contrib.plotting.Track. If genomic_signal_objs is None, then only show the peaks axes """ super(PeakMiniBrowser, self).__init__(genomic_signal_objs, **kwargs) self.bed = bed def panels(self): ax1 = self.fig.add_subplot(211) ax2 = self.fig.add_subplot(212, sharex=ax1) return (ax1, self.signal_panel), (ax2, self.peak_panel) def peak_panel(self, ax, feature): bedtool = pybedtools.BedTool(self.bed) features = bedtool.intersect([feature], u=True) track = Track(features) ax.add_collection(track) # ax.axis('tight') return feature if __name__ == "__main__": import metaseq import gffutils import pybedtools G = gffutils.FeatureDB( metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db')) ip = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam') inp = metaseq.genomic_signal( metaseq.example_filename('wgEncodeUwTfbsK562InputStdAlnRep1.bam'), 'bam') peaks = pybedtools.BedTool(metaseq.example_filename( 'wgEncodeUwTfbsK562CtcfStdPkRep1.narrowPeak.gz')) plotting_kwargs = [ dict(color='r', label='IP'), dict(color='k', linestyle=':', label='input')] local_coverage_kwargs = dict(fragment_size=200) b = SignalMiniBrowser( [ip, inp], plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs) g = GeneModelMiniBrowser( [ip, inp], G, plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs) p = PeakMiniBrowser( [ip, inp], peaks, plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs) feature = peaks[3] b.plot(feature) g.plot(feature) p.plot(feature) plt.show()