Skip to content
Snippets Groups Projects
xp-count-reads-ribseq.pl 1.06 KiB
Newer Older
use Data::Dumper;

my %hash  = ();
my %anno  = ();
my $readsSum = 0;

open(SAM,$ARGV[0]);

#Annotation file included with beginning and end of every CDS
my $annoFile = (defined($ARGV[1])) ? 1 : 0;

if($annoFile){
	open(AN,$ARGV[1]);
	while( $line = <AN> ){
		chomp $line;
		my @F = split /\t/ , $line; #annotation line
		my @a = ($F[1],$F[2]);
		$anno{$F[0]} = \@a;
	}
} else {
	print STDERR "Annotation file not specified, using len(5UTR)=200\n";
}

#print Dumper(%anno);

#Parse SAM file and count reads
while( $line = <SAM> ){

	chomp $line;
	next if $line =~ /^@/; #discard headers/comments

	my @F = split /\t/ , $line; #sam line

	my $gene_name = $F[2];
        if($F[1] == 0) { #only consider hits in plus strand
		if($annoFile){
			if(($F[3] >= $anno{$gene_name}->[0]-15) & ($F[3] <= $anno{$gene_name}->[1]-15)) { #close to start and stop
				$hash{$gene_name}++;
			}
		} {	
			if($F[3] >= 186) { #consider that len(5UTR)==200
				$hash{$gene_name}++;
			}
		}
	}
}

#print Dumper(\%hash);
print "gene\tcounts\n";
for $key (keys %hash){
	print $key,"\t",$hash{$key},"\n";
}