Assembly example

The following example shows how to generate an Assembly Hub, which is a way of supporting an assembly that is not already supported on UCSC.

It requires a .2bit file, which is created from a FASTA file using UCSC’s faToTwoBit tool. If you have bioconda set up, you can get this with conda install ucsc-fatotwobit, or download it from http://hgdownload.cse.ucsc.edu/admin/exe/.

This code is automatically run and the built trackhub is uploaded to the trackhub-demo repository. You can view the live hub using this link.

import trackhub
import re
import sys
import os
import glob

# In contrast to the example in the README, we do not use the
# `trackhub.default_hub` function but instead build up the hub from its
# component pieces.
hub = trackhub.Hub(
    "assembly_hub",
    short_label="assembly_hub",
    long_label="an example of an assembly hub",
    email="none@example.com")

# The major difference from a regular track hub is this object, which needs
# to be added to the genomes_file object:
genome = trackhub.Assembly(
    genome="newOrg1",
    twobit_file=os.path.join(trackhub.helpers.data_dir(), "newOrg1.2bit"),
    organism="Big Foot",
    defaultPos="chr1:0-1000000",
    scientificName="Biggus Footus",
    description="BigFoot V4",
    html_string="BIGFOOT V4 INFO\n",
    orderKey=4800
)

genomes_file = trackhub.GenomesFile()
hub.add_genomes_file(genomes_file)

# we also need to create a trackDb and add it to the genome
trackdb = trackhub.TrackDb()
genome.add_trackdb(trackdb)

# add the genome to the genomes file here:
genomes_file.add_genome(genome)

# Find all bigwigs for this genome in the example data directory, and make
# tracks for them
for bw in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*no1*.bw")):
    name, _, _ = os.path.basename(bw).split(".")
    track = trackhub.Track(
        name=trackhub.helpers.sanitize(name),
        source=bw,
        tracktype='bigWig',
        autoScale='on')
    trackdb.add_tracks(track)

# Same with bigBeds
for bb in glob.glob(os.path.join(trackhub.helpers.data_dir(), "*no1*.bigBed")):
    name, _ = os.path.basename(bb).split(".")
    track = trackhub.Track(
        name=trackhub.helpers.sanitize(name),
        source=bb,
        tracktype='bigBed')
    trackdb.add_tracks(track)

# Assembly hubs also need to have a Group specified. Here's how to do that:
example_group = trackhub.groups.GroupDefinition(
    "example_tracks",
    label="Example Tracks",
    priority=1,
    default_is_closed=False)

groups_file = trackhub.groups.GroupsFile([example_group])
genome.add_groups(groups_file)

# We can now add the "group" parameter to all the children of the trackDb
for track in trackdb.children:
    track.add_params(group="example_tracks")

trackhub.upload.upload_hub(hub=hub, host='localhost',
    remote_dir='example_hubs/example_assembly_hub')