Collect all TPM values from salmon
################################
### Filter salmon (TPM)
################################################################################
rule filter_salmon_TPM:
input:
salmon_genes_out = os.path.join(config["output_dir"], "{sample}", "salmon", "quant_reads", "quant.genes.sf"),
output:
salmon_filtered = os.path.join(config["output_dir"], "{sample}", "salmon", "quant_reads", "quant.genes.TPM.tsv")
log:
os.path.join(config["local_log"], "filter_salmon_TPM_{sample}.log")
run:
df = pd.read_csv(input.salmon_genes_out, header=0, sep="\t")
df = df[["Name", "TPM"]].copy()
df.set_index("Name", inplace=True)
df.to_csv(output.salmon_filtered, header=True, sep="\t", index=True)
################################################################################
### Collect all salmon reads TPM
################################################################################
rule collect_salmon_reads_TPM:
input:
salmon_filtered = expand(os.path.join(config["output_dir"], "{sample}", "salmon", "quant_reads", "quant.genes.TPM.tsv"), sample=get_samples())
output:
salmon_filtered_all = os.path.join(config["output_dir"], "salmon", "quant_reads", "quant.genes.TPM.tsv")
params:
sample_name = expand("{sample}", sample=get_samples())
log:
os.path.join(config["local_log"], "collect_salmon_reads_TPM.log")
run:
df_merge = pd.DataFrame()
for i in range(len(input.salmon_filtered)):
df = pd.read_csv(input.salmon_filtered[i], header=0, sep="\t")
df.columns = ["Name", params.sample_name[i]]
if df_merge.empty:
df_merge = df.copy()
else:
df_merge = pd.merge(df_merge, df, on="Name")
df_merge.to_csv(output.salmon_filtered_all, header=True, sep="\t", index=False)