Source code for mwtab.cli

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
The mwtab command-line interface
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Usage:
    mwtab -h | --help
    mwtab --version
    mwtab convert (<from-path> <to-path>) [--from-format=<format>] [--to-format=<format>] [--validate] [--mw-rest=<url>] [--verbose]
    mwtab validate <from-path> [--mw-rest=<url>] [--verbose]
    mwtab download url <url> [--to-path=<path>] [--verbose]
    mwtab download study all [--to-path=<path>] [--input-item=<item>] [--output-format=<format>] [--mw-rest=<url>] [--validate] [--verbose]
    mwtab download study <input-value> [--to-path=<path>] [--input-item=<item>] [--output-item=<item>] [--output-format=<format>] [--mw-rest=<url>] [--validate] [--verbose]
    mwtab download (study | compound | refmet | gene | protein) <input-item> <input-value> <output-item> [--output-format=<format>] [--to-path=<path>] [--mw-rest=<url>] [--verbose]
    mwtab download moverz <input-item> <m/z-value> <ion-type-value> <m/z-tolerance-value> [--to-path=<path>] [--mw-rest=<url>] [--verbose]
    mwtab download exactmass <LIPID-abbreviation> <ion-type-value> [--to-path=<path>] [--mw-rest=<url>] [--verbose]
    mwtab extract metadata <from-path> <to-path> <key> ... [--to-format=<format>] [--no-header]
    mwtab extract metabolites <from-path> <to-path> (<key> <value>) ... [--to-format=<format>] [--no-header]

Options:
    -h, --help                      Show this screen.
    --version                       Show version.
    --verbose                       Print what files are processing.
    --validate                      Validate the mwTab file.
    --from-format=<format>          Input file format, available formats: mwtab, json [default: mwtab].
    --to-format=<format>            Output file format [default: json].
                                    Available formats for convert:
                                        mwtab, json.
                                    Available formats for extract:
                                        json, csv.
    --mw-rest=<url>                 URL to MW REST interface
                                    [default: https://www.metabolomicsworkbench.org/rest/].
    --context=<context>             Type of resource to access from MW REST interface, available contexts: study,
                                    compound, refmet, gene, protein, moverz, exactmass [default: study].
    --input-item=<item>             Item to search Metabolomics Workbench with.
    --output-item=<item>            Item to be retrieved from Metabolomics Workbench.
    --output-format=<format>        Format for item to be retrieved in, available formats: mwtab, json.
    --no-header                     Include header at the top of csv formatted files.

    For extraction <to-path> can take a "-" which will use stdout.
"""

from . import fileio, mwextract, mwrest
from .converter import Converter
from .validator import validate_file
from .mwschema import section_schema_mapping

from os import getcwd, makedirs, path
from os.path import join, isfile
from urllib.parse import quote_plus

import json
import re

# remove
import time
import datetime


OUTPUT_FORMATS = {
    "txt": "txt",
    "mwtab": "txt",
    "json": "json",
    None: None
}
VERBOSE = False


def check_filepath(filepath):
    """Method for validating that a given path directory exits. If not, the directory is created.

    :param str filepath: File path string.
    :return: None
    :rtype: :py:obj:`None`
    """
    if not path.exists(path.dirname(filepath)):
        dirname = path.dirname(filepath)
        if dirname:
            makedirs(dirname)


def get_file_path(dir_path, filename, extension):
    """Helper method for validating that the commandline arguments "--to-path" or _ are not "None". Returns the given
    command argument if not none or creates a default file path from the given filename and the current working
    directory.

    :param dir_path: Path to directory file is to be saved in.
    :type dir_path: :py:class:`str` or :py:class:`None`
    :param str filename: Filename processed file is to be saved as.
    :param str extension: File extension.
    :return: Complete file path.
    :rtype: :py:class:`str`
    """
    # check to see if given directory path is not None
    dir_path = dir_path if dir_path else getcwd()
    if path.splitext(dir_path)[1]:
        return dir_path
    extension = extension if extension else "txt"
    return join(dir_path, ".".join([quote_plus(filename).replace(".", "_"), extension]))


def download(context, cmdparams):
    """Method for creating Metabolomics Workbench REST URLs and requesting files based on given commandline arguments.
    Retrieved data is then saved out as specified.

    :param str context: String indicating the type of data ("context") to be accessed from the Metabolomics Workbench.
    :param dict cmdparams: Commandline arguments specifying data to be accessed from Metabolomics Workbench.
    :return: None
    :rtype: :py:obj:`None`
    """
    try:
        # TODO: Convert to using mwrest.generate_study_urls() method
        # create and validate a callable URL to pull data from Metabolomics Workbench's REST API
        mwresturl = mwrest.GenericMWURL({
            "context": context,
            "input_item": cmdparams.get("<input-item>") if cmdparams.get("<input-item>") else "analysis_id",
            "input_value": cmdparams["<input-value>"],
            "output_item": cmdparams.get("<output-item>") if cmdparams.get("<output-item>") else "mwtab",
            "output_format": OUTPUT_FORMATS[cmdparams.get("--output-format")] if cmdparams.get("--output-format") else "txt",
        }).url
        mwrestfile = next(fileio.read_mwrest(mwresturl))

        if mwrestfile.text:  # if the text file isn't blank
            with open(get_file_path(
                    cmdparams.get("--to-path"),
                    mwrestfile.source,
                    OUTPUT_FORMATS[cmdparams.get("--output-format")]
            ), "w", encoding="utf-8") as fh:
                mwrestfile.write(fh)
        else:
            print("BLANK FILE")
    except Exception as e:
        print(e)


[docs]def cli(cmdargs): """Implements the command line interface. param dict cmdargs: dictionary of command line arguments. """ VERBOSE = cmdargs["--verbose"] fileio.VERBOSE = cmdargs["--verbose"] fileio.MWREST = cmdargs["--mw-rest"] mwrest.VERBOSE = cmdargs["--verbose"] # mwtab convert ... if cmdargs["convert"]: converter = Converter(from_path=cmdargs["<from-path>"], to_path=cmdargs["<to-path>"], from_format=cmdargs["--from-format"], to_format=cmdargs["--to-format"], validate=cmdargs["--validate"]) converter.convert() # mwtab validate ... elif cmdargs["validate"]: for mwfile in fileio.read_files(cmdargs["<from-path>"], validate=cmdargs["--validate"]): validate_file( mwtabfile=mwfile, section_schema_mapping=section_schema_mapping, verbose=cmdargs.get("--verbose") ) # mwtab download ... elif cmdargs["download"]: # mwtab download url ... if cmdargs["<url>"]: mwrestfile = next(fileio.read_mwrest(cmdargs["<url>"])) with open(get_file_path( cmdargs["--to-path"], mwrestfile.source, OUTPUT_FORMATS[cmdargs.get("--output-format")]), "w", encoding="utf-8" ) as fh: mwrestfile.write(fh) # mwtab download study ... elif cmdargs["study"]: # mwtab download study all ... if cmdargs["all"]: # mwtab download study all ... # mwtab download study all --input-item=analysis_id ... # mwtab download study all --input-item=study_id ... # TODO: mwtab download study all --input-item=project_id ... if not cmdargs["--input-item"] or cmdargs["--input-item"] in ("analysis_id", "study_id"): cmdargs["<input-item>"] = cmdargs["--input-item"] id_list = list() if not cmdargs["--input-item"] or cmdargs["--input-item"] == "analysis_id": id_list = mwrest.analysis_ids() elif cmdargs["--input-item"] == "study_id": id_list = mwrest.study_ids() for count, input_id in enumerate(id_list): if VERBOSE: print("[{:4}/{:4}]".format(count+1, len(id_list)), input_id, datetime.datetime.now()) cmdargs["<input-value>"] = input_id download("study", cmdargs) time.sleep(3) else: raise ValueError("Unknown \"--input-item\" {}".format(cmdargs["--input-item"])) # mwtab download study <input_value> ... elif cmdargs["<input-value>"] and not cmdargs["<input-item>"]: if isfile(cmdargs["<input-value>"]): with open(cmdargs["<input-value>"], "r") as fh: id_list = json.loads(fh.read()) if VERBOSE: print("Found {} Files to be Downloaded".format(len(id_list))) for count, input_id in enumerate(id_list): if VERBOSE: print("[{:4}/{:4}]".format(count + 1, len(id_list)), input_id, datetime.datetime.now()) cmdargs["<input-value>"] = input_id download("study", cmdargs) time.sleep(3) else: input_item = cmdargs.get("--input-item") input_value = cmdargs["<input-value>"] if not input_item: if input_value.isdigit(): input_value = "AN{}".format(input_value.zfill(6)) input_item = "analysis_id" elif re.match(r'(AN[0-9]{6}$)', input_value): input_item = "analysis_id" elif re.match(r'(ST[0-9]{6}$)', input_value): input_item = "study_id" mwresturl = mwrest.GenericMWURL({ "context": "study", "input_item": input_item, "input_value": input_value, "output_item": cmdargs.get("--output-item") or "mwtab", "output_format": cmdargs["--output-format"], }, cmdargs["--mw-rest"]).url mwrestfile = next(fileio.read_mwrest(mwresturl)) with open(cmdargs["--to-path"] or join(getcwd(), quote_plus(mwrestfile.source).replace(".", "_") + "." + cmdargs[ "--output-format"]), "w", encoding="utf-8") as fh: mwrestfile.write(fh) # mwtab download (study | ...) <input_item> ... elif cmdargs["<input-item>"]: download("study", cmdargs) # mwtab download (... compound | refmet | gene | protein) ... elif cmdargs["compound"]: download("compound", cmdargs) elif cmdargs["refmet"]: download("refmet", cmdargs) elif cmdargs["gene"]: download("gene", cmdargs) elif cmdargs["protein"]: download("protein", cmdargs) # mwtab download moverz <input-value> <m/z-value> <ion-type-value> <m/z-tolerance-value> [--verbose] elif cmdargs["moverz"]: mwresturl = mwrest.GenericMWURL({ "context": "moverz", "input_item": cmdargs["<input-item>"], "m/z_value": cmdargs["<m/z-value>"], "ion_type_value": cmdargs["<ion-type-value>"], "m/z_tolerance_value": cmdargs["<m/z-tolerance-value>"], }).url mwrestfile = next(fileio.read_mwrest(mwresturl)) with open(cmdargs["--to-path"] or join(getcwd(), quote_plus(mwrestfile.source).replace(".", "_") + ".txt"), "w") as fh: mwrestfile.write(fh) # mwtab download exactmass <LIPID-abbreviation> <ion-type-value> [--verbose] elif cmdargs["exactmass"]: mwresturl = mwrest.GenericMWURL({ "context": "exactmass", "LIPID_abbreviation": cmdargs["<LIPID-abbreviation>"], "ion_type_value": cmdargs["<ion-type-value>"], }).url mwrestfile = next(fileio.read_mwrest(mwresturl)) with open(cmdargs["--to-path"] or join(getcwd(), quote_plus(mwrestfile.source).replace(".", "_") + ".txt"), "w") as fh: mwrestfile.write(fh) # mwtab extract ... elif cmdargs["extract"]: mwfile_generator = fileio.read_files(cmdargs["<from-path>"]) if cmdargs["metabolites"]: metabolites_dict = mwextract.extract_metabolites( mwfile_generator, mwextract.generate_matchers( [(cmdargs["<key>"][i], cmdargs["<value>"][i] if not cmdargs["<value>"][i][:2] == "r'" else re.compile(cmdargs["<value>"][i][2:-1])) for i in range(len(cmdargs["<key>"]))] ) ) if cmdargs["<to-path>"] != "-": if cmdargs["--to-format"] == "csv": mwextract.write_metabolites_csv(cmdargs["<to-path>"], metabolites_dict, cmdargs["--no-header"]) else: mwextract.write_json(cmdargs["<to-path>"], metabolites_dict) else: print(json.dumps(metabolites_dict, indent=4, cls=mwextract.SetEncoder)) elif cmdargs["metadata"]: metadata = dict() for mwtabfile in mwfile_generator: extracted_values = mwextract.extract_metadata(mwtabfile, cmdargs["<key>"]) [metadata.setdefault(key, set()).update(val) for (key, val) in extracted_values.items()] if cmdargs["<to-path>"] != "-": if cmdargs["--to-format"] == "csv": mwextract.write_metadata_csv(cmdargs["<to-path>"], metadata, cmdargs["--no-header"]) else: mwextract.write_json(cmdargs["<to-path>"], metadata) else: print(metadata)