Downloading Files: FASTA and FASTQΒΆ
The example below is a CLI script to download FASTA and FASTQ files from two plugins output with given report (Results) ID. One of plugin output has a non-deterministic file output name.
~$ python get_fasta_and_fastq.py -h
usage: get_plugin_fasta_fastq_data.py [-h] [--host [HOST]]
[--username [USERNAME]]
[--password [PASSWORD]]
[--resultPK [RESULTPK]]
Get the FASTA and FASTQ from generateConsensus and FileExporter for all the
barcodes for the requested Result id
optional arguments:
-h, --help show this help message and exit
--host [HOST] target host to download files. (default: None)
--username [USERNAME]
TB account username. (default: ionadmin)
--password [PASSWORD]
TB account password. (default: ionadmin)
--resultPK [RESULTPK]
the result primary key or ID. Case insensitive.
# Copyright (C) 2021 Thermo Fisher Scientific. All Rights Reserved.
import os
import sys
import json
import argparse
import requests
from bs4 import BeautifulSoup
help_text = """
Requirement:
one of these options must be used: '--host', '--resultPK', .
Logic:
- Downloads all Sample Consensus Sequences FASTA from the Generate Consensus
- Downloads compressed Zip which contains all FASTQ from FileExporter
- Make sure requests, bs4(BeautifulSoup4) modules are installed via pip
"""
class GetPluginResults:
api_name = "results"
deleted_count = 0
ignored_names = []
def __init__(self, inputArgs):
self.host = inputArgs["host"]
self.auth = (inputArgs["username"], inputArgs["password"])
self.resultPK = inputArgs["resultPK"]
def get_objects(self, url):
try:
out = requests.get(url, auth=self.auth)
if out.ok:
return out.json()
else:
print(
">>>> (Status Code: %d) Unable to retrieve %s"
% (out.status_code, out.url)
)
except requests.ConnectionError:
print(">>>> Unable to connect %s" % (url))
def getFastaFastq(self):
if self.resultPK:
url = os.path.join(self.host, "rundb/api/v1", self.api_name, self.resultPK)
for obj in self.get_objects(url).get("pluginresults"):
pluginUrl = str(self.host + obj)
pluginOut = self.get_objects(pluginUrl)
if pluginOut.get("pluginName") == "generateConsensus":
self.download_fasta(pluginOut)
if pluginOut.get("pluginName") == "FileExporter":
self.download_fastq(pluginOut)
def getStartPluginJson(self, pluginOut):
startPluginUrl = self.host + pluginOut.get("URL") + "startplugin.json"
req = requests.get(startPluginUrl, auth=self.auth)
return req.json()
def download_fasta(self, pluginOut):
print("Starting FASTA download...")
startPluginUrl = self.host + pluginOut.get("URL") + "startplugin.json"
req = requests.get(startPluginUrl, auth=self.auth)
data = req.json()
allConsenusFastaIn = (
data.get("expmeta").get("output_file_name_stem") + ".consensus.fasta"
)
allConsenusFastaOut = (
data.get("expmeta").get("output_file_name_stem")
+ "_"
+ str(pluginOut.get("id"))
+ ".consensus.fasta"
)
try:
file_url = self.host + pluginOut.get("URL") + allConsenusFastaIn
req = requests.get(file_url, auth=self.auth)
with open(allConsenusFastaOut, "wb") as f:
f.write(req.content)
print(allConsenusFastaIn)
except Exception as Err:
print("FASTQ download failed. Please check %s" % Err)
print("Completed FASTA download")
def download_fastq(self, pluginOut):
print("Starting the FASTQ download...")
resultDirPath = (
self.getStartPluginJson(pluginOut)
.get("runinfo")
.get("results_dir")
.split("/")
)
metal_url = os.path.join(
self.host,
"report",
str(self.resultPK),
"metal",
resultDirPath[-2],
resultDirPath[-1],
)
req = requests.get(metal_url, auth=self.auth)
soup = BeautifulSoup(req.content, features="html.parser")
rows = soup.find("table").find_all("tr")
fastq_zip = None
for row in rows:
try:
fileName = row.find("a").get_text()
if "zip" in fileName:
fastq_zip = fileName
exit()
except Exception:
continue
if fastq_zip:
zipUrlIn = self.host + pluginOut.get("URL") + fastq_zip
zipUrlOut = str(pluginOut.get("id")) + "_" + fastq_zip
response = requests.get(
zipUrlIn, stream=True, auth=("ionadmin", "ionadmin")
)
with open(zipUrlOut, "wb") as zip:
for chunk in response.iter_content(chunk_size=512):
if chunk: # filter out keep-alive new chunks
zip.write(chunk)
print(fastq_zip)
else:
print("FASTQ download did not complete")
exit()
print("Completed FASTA download")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="get_plugin_fasta_fastq_data.py",
description="Get the FASTA and FASTQ from generateConsensus and FileExporter "
"for all the barcodes for the requested "
"Result id",
)
parser.add_argument(
"--host",
nargs="?",
help="target host to download files. (default: %(default)s)",
)
parser.add_argument(
"--username",
nargs="?",
default="ionadmin",
help="TB account username. (default: %(default)s)",
)
parser.add_argument(
"--password",
nargs="?",
default="ionadmin",
help="TB account password. (default: %(default)s)",
)
parser.add_argument(
"--resultPK",
nargs="?",
help="the result primary key or ID. " + "Case insensitive.",
)
args = vars(parser.parse_args())
if args.get("host") and args.get("resultPK"):
if "http" not in args.get("host"):
print("need to specifiy HTTP or HTTPS")
sys.exit(1)
pluginResultData = GetPluginResults(args)
pluginResultData.getFastaFastq()
else:
print("Error: one of the required options is not used")
print(help_text)
parser.parse_args(["-h"])
sys.exit(1)