Source code for mwtab.converter

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
mwtab.converter
~~~~~~~~~~~~~~~

This module provides functionality for converting between the
Metabolomics Workbench ``mwTab`` formatted file and its equivalent
JSONized representation.

The following conversions are possible:

Local files:
   * One-to-one file conversions:
      * textfile - to - textfile
      * textfile - to - textfile.gz
      * textfile - to - textfile.bz2
      * textfile.gz - to - textfile
      * textfile.gz - to - textfile.gz
      * textfile.gz - to - textfile.bz2
      * textfile.bz2 - to - textfile
      * textfile.bz2 - to - textfile.gz
      * textfile.bz2 - to - textfile.bz2
      * textfile / textfile.gz / textfile.bz2 - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion)
   * Many-to-many files conversions:
      * Directories:
         * directory - to - directory
         * directory - to - directory.zip
         * directory - to - directory.tar
         * directory - to - directory.tar.bz2
         * directory - to - directory.tar.gz
         * directory - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
      * Zipfiles:
         * zipfile.zip - to - directory
         * zipfile.zip - to - zipfile.zip
         * zipfile.zip - to - tarfile.tar
         * zipfile.zip - to - tarfile.tar.gz
         * zipfile.zip - to - tarfile.tar.bz2
         * zipfile.zip - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
      * Tarfiles:
         * tarfile.tar - to - directory
         * tarfile.tar - to - zipfile.zip
         * tarfile.tar - to - tarfile.tar
         * tarfile.tar - to - tarfile.tar.gz
         * tarfile.tar - to - tarfile.tar.bz2
         * tarfile.tar - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
         * tarfile.tar.gz - to - directory
         * tarfile.tar.gz - to - zipfile.zip
         * tarfile.tar.gz - to - tarfile.tar
         * tarfile.tar.gz - to - tarfile.tar.gz
         * tarfile.tar.gz - to - tarfile.tar.bz2
         * tarfile.tar.gz - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
         * tarfile.tar.bz2 - to - directory
         * tarfile.tar.bz2 - to - zipfile.zip
         * tarfile.tar.bz2 - to - tarfile.tar
         * tarfile.tar.bz2 - to - tarfile.tar.gz
         * tarfile.tar.bz2 - to - tarfile.tar.bz2
         * tarfile.tar.bz2 - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
URL files:
   * One-to-one file conversions:
      * analysis_id - to - textfile
      * analysis_id - to - textfile.gz
      * analysis_id - to - textfile.bz2
      * analysis_id - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion)
      * textfileurl - to - textfile
      * textfileurl - to - textfile.gz
      * textfileurl - to - textfile.bz2
      * textfileurl.gz - to - textfile
      * textfileurl.gz - to - textfile.gz
      * textfileurl.gz - to - textfile.bz2
      * textfileurl.bz2 - to - textfile
      * textfileurl.bz2 - to - textfile.gz
      * textfileurl.bz2 - to - textfile.bz2
      * textfileurl / textfileurl.gz / textfileurl.bz2 - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion)
   * Many-to-many files conversions:
      * Zipfiles:
         * zipfileurl.zip - to - directory
         * zipfileurl.zip - to - zipfile.zip
         * zipfileurl.zip - to - tarfile.tar
         * zipfileurl.zip - to - tarfile.tar.gz
         * zipfileurl.zip - to - tarfile.tar.bz2
         * zipfileurl.zip - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
      * Tarfiles:
         * tarfileurl.tar - to - directory
         * tarfileurl.tar - to - zipfile.zip
         * tarfileurl.tar - to - tarfile.tar
         * tarfileurl.tar - to - tarfile.tar.gz
         * tarfileurl.tar - to - tarfile.tar.bz2
         * tarfileurl.tar - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
         * tarfileurl.tar.gz - to - directory
         * tarfileurl.tar.gz - to - zipfile.zip
         * tarfileurl.tar.gz - to - tarfile.tar
         * tarfileurl.tar.gz - to - tarfile.tar.gz
         * tarfileurl.tar.gz - to - tarfile.tar.bz2
         * tarfileurl.tar.gz - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
         * tarfileurl.tar.bz2 - to - directory
         * tarfileurl.tar.bz2 - to - zipfile.zip
         * tarfileurl.tar.bz2 - to - tarfile.tar
         * tarfileurl.tar.bz2 - to - tarfile.tar.gz
         * tarfileurl.tar.bz2 - to - tarfile.tar.bz2
         * tarfileurl.tar.bz2 - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
"""

import os
import io
import zipfile
import tarfile
import bz2
import gzip

from . import fileio


[docs]class Translator(object): """Translator abstract class.""" def __init__(self, from_path, to_path, from_format=None, to_format=None, validate=False): """Translator initializer. :param str from_path: Path to input file(s). :param str to_path: Path to output file(s). :param str from_format: Input format. :param str to_format: Output format. """ self.from_path = from_path self.to_path = to_path self.from_format = from_format self.to_format = to_format self.from_path_compression = fileio.GenericFilePath.is_compressed(from_path) self.to_path_compression = fileio.GenericFilePath.is_compressed(to_path) self.validate = validate def __iter__(self): """Abstract iterator must be implemented in a subclass.""" raise NotImplementedError()
[docs]class MWTabFileToMWTabFile(Translator): """Translator concrete class that can convert between ``mwTab`` and ``JSON`` formats.""" file_extension = {"json": ".json", "mwtab": ".txt"} def __init__(self, from_path, to_path, from_format=None, to_format=None, validate=False): """MWTabFileToMWTabFile translator initializer. :param str from_path: Path to input file(s). :param str to_path: Path to output file(s). :param str from_format: Input format: `mwtab` or `json`. :param str to_format: Output format: `mwtab` or `json`. :param bool validate: whether to validate or not. """ super(MWTabFileToMWTabFile, self).__init__(from_path, to_path, from_format, to_format, validate) def __iter__(self): """Iterator that yields instances of :class:`~mwtab.mwtab.MWTabFile` instances. :return: instance of :class:`~mwtab.mwtab.MWTabFile` object instance. :rtype: :class:`~mwtab.mwtab.MWTabFile` """ for mwtabfile in fileio.read_files(self.from_path, validate=self.validate): yield mwtabfile
[docs]class Converter(object): """Converter class to convert ``mwTab`` files from ``mwTab`` to ``JSON`` or from ``JSON`` to ``mwTab`` format.""" def __init__(self, from_path, to_path, from_format="mwtab", to_format="json", validate=False): """Converter initializer. :param str from_path: Path to input file(s). :param str to_path: Path to output file(s). :param str from_format: Input format: `mwtab` or `json`. :param str to_format: Output format: `mwtab` or `json`. :param bool validate: whether to validate or not. """ self.file_generator = MWTabFileToMWTabFile(from_path, to_path, from_format, to_format, validate)
[docs] def convert(self): """Convert file(s) from ``mwTab`` format to ``JSON`` format or from ``JSON`` format to ``mwTab`` format. :return: None :rtype: :py:obj:`None` """ if not os.path.exists(os.path.dirname(self.file_generator.to_path)): dirname = os.path.dirname(self.file_generator.to_path) if dirname: os.makedirs(dirname) if os.path.isdir(self.file_generator.from_path): self._many_to_many() elif os.path.isfile(self.file_generator.from_path) or fileio.GenericFilePath.is_url(self.file_generator.from_path): if self.file_generator.from_path_compression in ("zip", "tar", "tar.gz", "tar.bz2"): self._many_to_many() elif self.file_generator.from_path_compression in ("gz", "bz2"): self._one_to_one() elif not self.file_generator.from_path_compression: self._one_to_one() elif self.file_generator.from_path.isdigit(): self._one_to_one() else: raise TypeError('Unknown input file format: "{}"'.format(self.file_generator.from_path))
def _many_to_many(self): """Perform many-to-many files conversion. :return: None :rtype: :py:obj:`None` """ if not self.file_generator.to_path_compression: self._to_dir(self.file_generator) elif self.file_generator.to_path_compression == "zip": self._to_zipfile(self.file_generator) elif self.file_generator.to_path_compression in ("tar", "tar.gz", "tar.bz2"): self._to_tarfile(self.file_generator) elif self.file_generator.to_path_compression in ("gz", "bz2"): raise TypeError('Many-to-one conversion, cannot convert "{}" into "{}"'.format(self.file_generator.from_path, self.file_generator.to_path)) else: raise TypeError('Unknown output file format: "{}"'.format(self.file_generator.to_path)) def _one_to_one(self): """Perform one-to-one file conversion. :return: None :rtype: :py:obj:`None` """ if not self.file_generator.to_path_compression: self._to_textfile(self.file_generator) elif self.file_generator.to_path_compression == "gz": self._to_gzipfile(self.file_generator) elif self.file_generator.to_path_compression == "bz2": self._to_bz2file(self.file_generator) elif self.file_generator.to_path_compression in ("tar", "tar.gz", "tar.bz2", "zip"): raise TypeError('One-to-many conversion, cannot convert "{}" into "{}"'.format(self.file_generator.from_path, self.file_generator.to_path)) else: raise TypeError('Unknown format: "{}"'.format(self.file_generator.to_path)) def _to_dir(self, file_generator): """Convert files to directory. :return: None :rtype: :py:obj:`None` """ for f in file_generator: outpath = self._output_path(f.source, file_generator.to_format) if not os.path.exists(os.path.dirname(outpath)): os.makedirs(os.path.dirname(outpath)) with open(outpath, mode="w") as outfile: f.write(outfile, file_generator.to_format) def _to_zipfile(self, file_generator): """Convert files to zip archive. :return: None :rtype: :py:obj:`None` """ with zipfile.ZipFile(file_generator.to_path, mode="w", compression=zipfile.ZIP_DEFLATED) as outfile: for f in file_generator: outpath = self._output_path(f.source, file_generator.to_format, archive=True) outfile.writestr(outpath, f.writestr(file_generator.to_format)) def _to_tarfile(self, file_generator): """Convert files to tar archive. :return: None :rtype: :py:obj:`None` """ if file_generator.to_path_compression == "tar": tar_mode = "w" elif file_generator.to_path_compression == "tar.gz": tar_mode = "w:gz" elif file_generator.to_path_compression == 'tar.bz2': tar_mode = "w:bz2" else: tar_mode = "w" with tarfile.open(file_generator.to_path, mode=tar_mode) as outfile: for f in file_generator: outpath = self._output_path(f.source, file_generator.to_format, archive=True) info = tarfile.TarInfo(outpath) data = f.writestr(file_generator.to_format).encode() info.size = len(data) outfile.addfile(tarinfo=info, fileobj=io.BytesIO(data)) def _to_bz2file(self, file_generator): """Convert file to bz2-compressed file. :return: None :rtype: :py:obj:`None` """ with bz2.BZ2File(file_generator.to_path, mode="wb") as outfile: for f in file_generator: outfile.write(f.writestr(file_generator.to_format).encode()) def _to_gzipfile(self, file_generator): """Convert file to gzip-compressed file. :return: None :rtype: :py:obj:`None` """ with gzip.GzipFile(file_generator.to_path, mode="wb") as outfile: for f in file_generator: outfile.write(f.writestr(file_generator.to_format).encode()) def _to_textfile(self, file_generator): """Convert file to regular text file. :return: None :rtype: :py:obj:`None` """ to_path = file_generator.to_path \ if file_generator.to_path.endswith(file_generator.file_extension[file_generator.to_format]) \ else file_generator.to_path + file_generator.file_extension[file_generator.to_format] with open(to_path, mode="w") as outfile: for f in file_generator: outfile.write(f.writestr(file_generator.to_format)) def _output_path(self, input_path, to_format, archive=False): """Construct an output path string from an input path string. :param str input_path: Input path string. :return: Output path string. :rtype: :py:class:`str` """ indirpath, fname = os.path.split(os.path.abspath(os.path.normpath(input_path))) commonprefix = os.path.commonprefix([os.path.abspath(self.file_generator.from_path), os.path.abspath(indirpath)]) commonparts = commonprefix.split(os.sep) inparts = indirpath.split(os.sep) outparts = inparts[len(commonparts):] if archive: outdirpath = os.path.join(*outparts) if outparts else "" else: outdirpath = os.path.join(self.file_generator.to_path, *outparts) return os.path.join(outdirpath, fname + self.file_generator.file_extension[to_format])