Skip to content
Snippets Groups Projects
Commit 186cad7b authored by Mate Balajti's avatar Mate Balajti
Browse files

feat: add cli

parent b4cc85c3
No related branches found
No related tags found
1 merge request!6feat: add cli, setup.py
Pipeline #17212 passed
# Transcript Sampler # Transcript Sampler
## Overview
This workflow samples representative transcripts per gene, in proportion to their relative abundance levels. Sampling is done by Poisson sampling. This workflow samples representative transcripts per gene, in proportion to their relative abundance levels. Sampling is done by Poisson sampling.
**This workflow takes as input:** This workflow takes as input:
- Path to genome annotation file in gtf format - Path to genome annotation file in gtf format
- Integer of number of transcripts to sample - Path to csv or tsv file with transcript IDs and expression levels
- Path to csv or tsv file with transcript IDs and expression levels - Path to output sample gtf file
- Path to output sample gtf file - Path to output sample transcript IDs and counts
- Path to output sample transcript IDs and counts - Integer of number of transcripts to sample
**The outputs are :** The outputs are :
- trancript sample gtf file - trancript sample gtf file
- csv file containing sample transcript IDs and counts. - csv file containing sample transcript IDs and counts.
**The workflow can be run via the command line as** ## Installation from github
Transcript sampler requires Python 3.9 or later.
`python transcript_sampler/new_exe.py --input_gtf={gtf input file} --input_csv={input csv file} --output_gtf={output gtf file} --output_csv={output csv file} --n_to_sample={number of transcripts}`
Install Transcript sampler from Github using:
```
git clone https://git.scicore.unibas.ch/zavolan_group/tools/transcript-sampler.git
cd transcript-sampler
pip install .
```
## Usage
```
usage: transcript-sampler [-h] --input_gtf INPUT_GTF --input_csv INPUT_CSV --output_gtf OUTPUT_GTF --output_csv OUTPUT_CSV --n_to_sample N_TO_SAMPLE
Transcript sampler
options:
-h, --help show this help message and exit
--input_gtf INPUT_GTF
GTF file with genome annotation (default: None)
--input_csv INPUT_CSV
CSV or TSV file with transcripts and their expression level (default: None)
--output_gtf OUTPUT_GTF
Output path for the new GTF file of representative transcripts (default: None)
--output_csv OUTPUT_CSV
Output path for the new CSV file of representative transcripts and their sampled number (default: None)
--n_to_sample N_TO_SAMPLE
Total number of transcripts to sample (default: None)
```
Example : Example :
`python transcript_sampler/new_exe.py --input_gtf="input_files/test.gtf" --input_csv="input_files/expression.csv" --output_gtf="output_files/output.gtf" --output_csv="output_files/output.csv" --n_to_sample=100` `transcript_sampler --input_gtf="tests/input_files/test.gtf" --input_csv="tests/input_files/expression.csv" --output_gtf="output_files/output.gtf" --output_csv="output_files/output.csv" --n_to_sample=100`
argparse argparse
biopython biopython
gtfparse gtfparse
polars == 0.16.17
numpy >= 1.23.3 numpy >= 1.23.3
pandas >= 1.4.4 pandas >= 1.4.4
\ No newline at end of file
...@@ -18,5 +18,8 @@ setup( ...@@ -18,5 +18,8 @@ setup(
author_email='mate.balajti@unibas.ch', author_email='mate.balajti@unibas.ch',
description='Transcript sampler', description='Transcript sampler',
packages=find_packages(), packages=find_packages(),
install_requires=INSTALL_REQUIRES install_requires=INSTALL_REQUIRES,
entry_points={
'console_scripts': ['transcript-sampler=transcript_sampler.cli:main']
}
) )
...@@ -2,92 +2,81 @@ ...@@ -2,92 +2,81 @@
import argparse import argparse
import time import time
import logging import logging
from find_reptrans import FindRepTrans
from match_reptrans_explvl import MatchReptransExplvl
from poisson_sampling import SampleTranscript
find_rep_trans = FindRepTrans()
match_reptrs_explvl = MatchReptransExplvl()
poisson_sample = SampleTranscript()
logging.basicConfig( logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s \ format='[%(asctime)s: %(levelname)s] %(message)s \
(module "%(module)s")', (module "%(module)s")',
level=logging.INFO, level=logging.INFO,
) )
LOG = logging.getLogger("main")
def main(args: argparse.Namespace):
"""Execute transcript sampler."""
start = time.time()
LOG.info("Started transcript sampler...")
dict_repr_trans = find_rep_trans.get_rep_trans(args.input_gtf)
df_repr = match_reptrs_explvl.match_repr_transcript_expression_level(
dict_reprTrans=dict_repr_trans,
exprTrans=args.input_csv,
gtf_file=args.input_gtf
)
LOG.info(
"Finding match between representative transcripts \
and expression level file"
)
LOG.info("Poisson sampling of transcripts")
poisson_sample.transcript_sampling(
args.transcript_nr, df_repr, args.output_csv)
LOG.info("Output CSV file ready")
LOG.info("Writing output GTF file")
find_rep_trans.gtf_file_writer(
args.input_gtf, dict_repr_trans, args.output_gtf)
end = time.time() from transcript_sampler.find_reptrans import FindRepTrans # noqa: E402,E501 # pylint:disable=wrong-import-position
LOG.info("Script executed in %s sec", (end - start)) from transcript_sampler.match_reptrans_explvl import MatchReptransExplvl # noqa: E402,E501 # pylint:disable=wrong-import-position
from transcript_sampler.poisson_sampling import SampleTranscript # noqa: E402,E501 # pylint:disable=wrong-import-position
find_rep_trans = FindRepTrans()
match_reptrs_explvl = MatchReptransExplvl()
poisson_sample = SampleTranscript()
def parse_arguments() -> argparse.Namespace:
"""Request parameters from user on CLI.
Returns: def main():
argparse.Namespace: object of arguments from CLI. """Execute transcript sampler."""
"""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Transcript sampler", description="Transcript sampler",
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter,
) )
parser.add_argument( parser.add_argument(
"--input_gtf", required=True, "--input_gtf", required=True, default=None,
help="GTF file with genome annotation" help="GTF file with genome annotation"
) )
parser.add_argument( parser.add_argument(
"--input_csv", required=True, "--input_csv", required=True, default=None,
help="CSV or TSV file with transcripts and their expression level" help="CSV or TSV file with transcripts and their expression level"
) )
parser.add_argument( parser.add_argument(
"--output_gtf", required=True, "--output_gtf", required=True, default=None,
help="Output path for the new GTF file of representative transcripts" help="Output path for the new GTF file of representative transcripts"
) )
parser.add_argument( parser.add_argument(
"--output_csv", required=True, "--output_csv", required=True, default=None,
help="Output path for the new CSV file of representative transcripts \ help="Output path for the new CSV file of representative transcripts \
and their sampled number" and their sampled number"
) )
parser.add_argument( parser.add_argument(
"--n_to_sample", required=True, "--n_to_sample", required=True, default=None,
help="Total number of transcripts to sample" help="Total number of transcripts to sample"
) )
args = parser.parse_args() args = parser.parse_args()
return args log = logging.getLogger("main")
start = time.time()
log.info("Started transcript sampler...")
dict_repr_trans = find_rep_trans.get_rep_trans(args.input_gtf)
df_repr = match_reptrs_explvl.match_repr_transcript_expression_level(
dict_reprTrans=dict_repr_trans,
exprTrans=args.input_csv,
gtf_file=args.input_gtf
)
log.info(
"Finding match between representative transcripts \
and expression level file"
)
log.info("Poisson sampling of transcripts")
poisson_sample.transcript_sampling(
args.n_to_sample, df_repr, args.output_csv)
log.info("Output CSV file ready")
log.info("Writing output GTF file")
find_rep_trans.gtf_file_writer(
args.input_gtf, dict_repr_trans, args.output_gtf)
if __name__ == '__main__': end = time.time()
log.info("Script executed in %s sec", (end - start))
if __name__ == "__main__":
logging.basicConfig( logging.basicConfig(
format='[%(asctime)s: %(levelname)s] %(message)s \ format='[%(asctime)s: %(levelname)s] %(message)s \
(module "%(module)s")', (module "%(module)s")',
level=logging.INFO, level=logging.INFO,
) )
logger = logging.getLogger(__name__) main()
arguments = parse_arguments()
main(arguments)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment