Skip to content
Snippets Groups Projects
Snakefile 36.3 KiB
Newer Older
BIOPZ-Bak Maciej's avatar
BIOPZ-Bak Maciej committed
                params.log_dir,
                "paired_end",
                "*"))
        for s in paired_end_dir:
            sample_name = s.split("/")[-1]
            shutil.copytree(
                s, \
                os.path.join(
                    params.log_dir,
                    "samples",
                    sample_name))
        shutil.rmtree(
            os.path.join(
                params.log_dir,
                "paired_end"),
            ignore_errors=False,
            onerror=None)
        # move single end results
        single_end_dir = glob.glob(
            os.path.join(
                params.log_dir,
                "single_end",
                "*"))
        for s in single_end_dir:
            sample_name = s.split("/")[-1]
            shutil.copytree(
                s, \
                os.path.join(
                    params.log_dir,
                    "samples",
                    sample_name))
        shutil.rmtree(
            os.path.join(
                params.log_dir,
                "single_end"),
            ignore_errors=False,
            onerror=None)

        # encapsulate salmon quantification results
        all_samples_dirs = glob.glob(
            os.path.join(
                params.results_dir,
                "samples",
                "*"))
        for s in all_samples_dirs:
            sample_name = s.split("/")[-1]
            shutil.move(
                os.path.join(
                    s,
                    "salmon_quant"),
                os.path.join(
                    s,
                    sample_name)
                )
            os.mkdir(os.path.join(
                s,
                "salmon_quant"))
            shutil.move(
                os.path.join(
                    s,
                    sample_name),
                os.path.join(
                    s,
                    "salmon_quant",
                    sample_name)
                )

        # adjust FastQC results 'Filename' field:
        fastq_zip_list = glob.glob(
            os.path.join(
                params.results_dir,
                "samples",
                "*",
                "*_fastqc",
                "*_fastqc.zip"))
        for zipfile in fastq_zip_list:
            sample_name = zipfile.split("/")[-3]
            zipfile_path_chunks = zipfile.split("/")
            new_path = os.path.join(*(zipfile_path_chunks[:-1]))
            with ZipFile(zipfile, 'r') as zip_f:
                zip_f.extractall(new_path)
            fastqc_data_f = os.path.join(
                zipfile[:-4],
                "fastqc_data.txt")
            with open(fastqc_data_f) as f:
                log_lines = f.read().splitlines()
            log_lines[3] = "Filename\t" + sample_name+"|"+log_lines[3].split("\t")[1]
            with open(fastqc_data_f, "w") as f:
                for i in log_lines: f.write(i+"\n")
            os.remove(zipfile)

        # adjust Kallisto quantification logs
        kallisto_logs = glob.glob(
            os.path.join(
                params.log_dir,
                "samples",
                "*",
                "genome_quantification_kallisto.stderr.log"))
        for kallisto_log in kallisto_logs:
            with open(kallisto_log) as f:
                log_lines = f.read().splitlines()
                temp = log_lines[8].split(".")
            log_lines[8] = temp[0] + "." + temp[2] + "." + temp[3]
            with open(kallisto_log+".MODIFIED", "w") as f:
                for i in log_lines: f.write(i+"\n")

        # add #-comment to all cutadapt logs:
        cutadapt_logs = glob.glob(
            os.path.join(
                params.log_dir,
                "samples",
                "*",
                "remove_*_cutadapt.stdout.log"))
        for cutadapt_log in cutadapt_logs:
            sample_name = cutadapt_log.split("/")[-2]
            with open(cutadapt_log) as f:
                log_lines = f.read().splitlines()
            log_lines[1] = log_lines[1] + " # " + sample_name
            with open(cutadapt_log, "w") as f:
                for i in log_lines: f.write(i+"\n")

        # adjust TIN boxplots filenames for MutliQC recognition
        os.rename(
            input.TIN_boxplot_PNG,
            os.path.join(
                params.results_dir,
                "TIN scores_mqc.png"))
        os.rename(
            input.TIN_boxplot_PDF,
            os.path.join(
                params.results_dir,
                "TIN scores_mqc.pdf"))


rule prepare_MultiQC_config:
    '''
        Prepare config for the MultiQC
    '''
    input:
        multiqc_input_dir = os.path.join(
            "{output_dir}",
            "samples")
    output:
        multiqc_config = os.path.join(
            "{output_dir}",
            "MultiQC_config.yaml")
    params:
        logo_path = os.path.join(
            "..",
            "..",
            "images",
            "logo.128px.png"),
        results_dir = config["output_dir"]
    log:
        LOG_local_log = \
            os.path.join("{output_dir}", "local_log", \
                "prepare_MultiQC_config.log")
    run:
        with open(output.multiqc_config, "w") as YAML:
            YAML.write("---\n\n")
            YAML.write("title: \"Rhea\"\n")
            YAML.write("subtitle: \"RNA-Seq processing pipeline developed by the members of Zavolan Lab\"\n")
            YAML.write("intro_text: \"Short analysis title from config[analysis_title]\"\n")
            YAML.write("custom_logo: \""+params.logo_path+"\"\n")
            YAML.write("custom_logo_url: \"https://www.biozentrum.unibas.ch/research/researchgroups/overview/unit/zavolan/research-group-mihaela-zavolan/\"\n")
            YAML.write("custom_logo_title: \"ZavoLab\"\n\n")
            YAML.write("report_header_info:\n")
            YAML.write("  - Project Type: \"Snakemake workflow\"\n")
            YAML.write("  - Analysis Type: \"RNA-seq\"\n")
            YAML.write("  - Analysis Author: \"config[author_name]\"\n")
            YAML.write("  - Contact E-mail: \"config[author_email]\"\n\n")
            YAML.write("top_modules:\n\n")
            YAML.write("  - fastqc:\n")
            YAML.write("      path_filters:\n")
            YAML.write("      - \"*/mate1_fastqc/*\"\n")
            YAML.write("      - \"*/mate2_fastqc/*\"\n")            
            YAML.write("\n")
            YAML.write("  - cutadapt:\n")
            YAML.write("      name: \"Cutadapt: adapter removal\"\n")
            YAML.write("      path_filters:\n")
            YAML.write("      - \"*/remove_adapters_cutadapt.stdout.log\"\n")
            YAML.write("\n")
            YAML.write("  - cutadapt:\n")
            YAML.write("      name: \"Cutadapt: polyA tails removal\"\n")
            YAML.write("      path_filters:\n")
            YAML.write("      - \"*/remove_polya_cutadapt.stdout.log\"\n")
            YAML.write("\n")
            YAML.write("  - star:\n")
            YAML.write("      path_filters:\n")
            YAML.write("      - \"*/map_genome/*\"\n")
            YAML.write("\n")
            YAML.write("  - TIN_scores:\n")
            YAML.write("      path_filters:\n")
            YAML.write("      - \"*/TIN scores_mqc.png\"\n")
            YAML.write("\n")  
            YAML.write("  - salmon:\n")
            YAML.write("      path_filters:\n")
            YAML.write("      - \"*/salmon_quant/*\"\n")
            YAML.write("\n")
            YAML.write("  - kallisto:\n")
            YAML.write("      path_filters:\n")
            YAML.write("      - \"*/genome_quantification_kallisto.stderr.log.MODIFIED\"\n")
            YAML.write("\n")
            YAML.write("...")


rule MULTIQC_report:
    '''
        Create report with MultiQC
    '''
    input:
        multiqc_config = os.path.join(
            config["output_dir"],
            "MultiQC_config.yaml")
    output:
        MultiQC_report = \
            directory(os.path.join("{output_dir}", "multiqc_summary"))
    params:
        results_dir = config["output_dir"],
        log_dir = config["log_dir"]
    log:
        LOG_local_log = \
            os.path.join("{output_dir}", "local_log", \
                "MULTIQC_report.log")
    singularity:
        "docker://ewels/multiqc:1.7"
    shell:
        """
        multiqc \
        --outdir {output.MultiQC_report} \
        --config {input.multiqc_config} \
        {params.results_dir} \
        {params.log_dir} \
        &> {log.LOG_local_log};
        """