#!/usr/bin/python
# Copyright (C) 2012 Ion Torrent Systems, Inc. All Rights Reserved
# FastqCreator plugin
import os
import glob
import json
import traceback
import pysam
import subprocess
from ion.utils import blockprocessing
from ion.plugin import *
# DNA base complements
COMPLEMENT = {'A': 'T',
'T': 'A',
'C': 'G',
'G': 'C',
'N': 'N'}
def reverse_complement(sequence):
return ''.join(COMPLEMENT[b] for b in sequence[::-1])
class FastqCreator(IonPlugin):
version = "3.6.0-r%s" % filter(str.isdigit,"$Revision: 56860 $")
runtypes = [ RunType.COMPOSITE, RunType.THUMB, RunType.FULLCHIP ]
def bam2fastq(self, bam_filename_list, fastq_filename):
try:
with open(fastq_filename, 'w') as fastq_file:
for bam_file in bam_filename_list:
if os.path.exists(bam_file):
try:
samfile = pysam.Samfile(bam_file, mode="rb",check_header=False,check_sq=False)
for x in samfile.fetch(until_eof=True):
if x.is_reverse:
qual = x.qual[::-1]
seq = reverse_complement(x.seq)
else:
qual = x.qual
seq = x.seq
fastq_file.write("@%s\n%s\n+\n%s\n" % (x.qname,seq,qual))
samfile.close()
except:
traceback.print_exc()
except:
traceback.print_exc()
def bam2fastq_picard(self, bam_filename_list, fastq_filename):
try:
com = blockprocessing.bam2fastq_command(bam_filename_list[0],fastq_filename)
ret = subprocess.call(com,shell=True)
except:
traceback.print_exc()
def launch(self, data=None):
with open('startplugin.json', 'r') as fh:
spj = json.load(fh)
net_location = spj['runinfo']['net_location']
basecaller_dir = spj['runinfo']['basecaller_dir']
alignment_dir = spj['runinfo']['alignment_dir']
barcodeId = spj['runinfo']['barcodeId']
runtype = spj['runplugin']['run_type']
url_path = os.getenv('TSP_URLPATH_PLUGIN_DIR','./')
print url_path
output_stem = os.getenv('TSP_FILEPATH_OUTPUT_STEM','unknown')
print "TSP_FILEPATH_OUTPUT_STEM: %s" % output_stem
reference_path = os.getenv('TSP_FILEPATH_GENOME_FASTA','')
with open(os.path.join(basecaller_dir, "datasets_basecaller.json"),'r') as f:
datasets_basecaller = json.load(f);
for dataset in datasets_basecaller['datasets']:
print dataset
# input
bam_list = []
if reference_path != '':
# don't use aligned bam files (TS-6279) or reverse-complemented it
bam = os.path.join(alignment_dir, dataset['file_prefix']+'.bam')
else:
bam = os.path.join(basecaller_dir, dataset['file_prefix']+'.basecaller.bam')
print bam
if os.path.exists(bam):
bam_list.append(bam)
# output
if barcodeId:
dataset['fastq'] = dataset['file_prefix'].rstrip('_rawlib')+'_'+output_stem+'.fastq'
else:
dataset['fastq'] = output_stem+'.fastq'
if len(bam_list) == 0:
print 'WARNING: missing input file(s) for %s' % dataset['fastq']
continue
try:
#use pysam only for unmapped bam files
#self.bam2fastq_pysam(bam_list,dataset['fastq'])
self.bam2fastq_picard(bam_list,dataset['fastq'])
except:
traceback.print_exc()
with open('FastqCreator_block.html','w') as f:
f.write('<html><body>To download: "Right Click" -> "Save Link As..."<br>\n')
for fastq_file in glob.glob('*.fastq'):
size = os.path.getsize(fastq_file)/1000
f.write('<a href="%s">%s</a> %sK<br>\n' % (os.path.join(net_location, url_path, fastq_file), fastq_file, size))
f.write('</body></html>\n')
return True
def report(self):
output = {
'sections': {
'title': 'FastqCreator',
'type': 'html',
'content': '<p>FastqCreator util</p>',
},
}
return output
def metrics(self):
""" Write result.json metrics """
return { 'blocks': 96 }
# dev use only - makes testing easier
if __name__ == "__main__": PluginCLI(FastqCreator())