Source code for gffutils.biopython_integration

"""
Module for integration with BioPython, specifically SeqRecords and SeqFeature
objects.
"""

try:
    from Bio.SeqFeature import SeqFeature, FeatureLocation
except ImportError:
    import warnings

    warnings.warn("BioPython must be installed to use this module")
from .feature import Feature, feature_from_line

_biopython_strand = {
    "+": 1,
    "-": -1,
    ".": None,
    "?": 0,
}
_feature_strand = dict((v, k) for k, v in _biopython_strand.items())


[docs] def to_seqfeature(feature): """ Converts a gffutils.Feature object to a Bio.SeqFeature object. The GFF fields `source`, `score`, `seqid`, and `frame` are stored as qualifiers. GFF `attributes` are also stored as qualifiers. Parameters ---------- feature : Feature object, or string If string, assume it is a GFF or GTF-format line; otherwise just use the provided feature directly. """ if isinstance(feature, str): feature = feature_from_line(feature) qualifiers = { "source": [feature.source], "score": [feature.score], "seqid": [feature.seqid], "frame": [feature.frame], } qualifiers.update(feature.attributes) return SeqFeature( # Convert from GFF 1-based to standard Python 0-based indexing used by # BioPython FeatureLocation( feature.start - 1, feature.stop, strand=_biopython_strand[feature.strand] ), id=feature.id, type=feature.featuretype, qualifiers=qualifiers, )
[docs] def from_seqfeature(s, **kwargs): """ Converts a Bio.SeqFeature object to a gffutils.Feature object. The GFF fields `source`, `score`, `seqid`, and `frame` are assumed to be stored as qualifiers. Any other qualifiers will be assumed to be GFF attributes. """ source = s.qualifiers.get("source", ".")[0] score = s.qualifiers.get("score", ".")[0] seqid = s.qualifiers.get("seqid", ".")[0] frame = s.qualifiers.get("frame", ".")[0] strand = _feature_strand[s.location.strand] # BioPython parses 1-based GenBank positions into 0-based for use within # Python. We need to convert back to 1-based GFF format here. start = s.location.start + 1 stop = s.location.end featuretype = s.type id = s.id attributes = dict(s.qualifiers) attributes.pop("source", ".") attributes.pop("score", ".") attributes.pop("seqid", ".") attributes.pop("frame", ".") return Feature( seqid, source, featuretype, start, stop, score, strand, frame, attributes, id=id, **kwargs )