use Data::Dumper; my %hash = (); my %anno = (); my $readsSum = 0; open(SAM,$ARGV[0]); #Annotation file included with beginning and end of every CDS my $annoFile = (defined($ARGV[1])) ? 1 : 0; if($annoFile){ open(AN,$ARGV[1]); while( $line = <AN> ){ chomp $line; my @F = split /\t/ , $line; #annotation line my @a = ($F[1],$F[2]); $anno{$F[0]} = \@a; } } else { print STDERR "Annotation file not specified, using len(5UTR)=200\n"; } #print Dumper(%anno); #Parse SAM file and count reads while( $line = <SAM> ){ chomp $line; next if $line =~ /^@/; #discard headers/comments my @F = split /\t/ , $line; #sam line my $gene_name = $F[2]; if($F[1] == 0) { #only consider hits in plus strand if($annoFile){ if(($F[3] >= $anno{$gene_name}->[0]-15) & ($F[3] <= $anno{$gene_name}->[1]-15)) { #close to start and stop $hash{$gene_name}++; } } { if($F[3] >= 186) { #consider that len(5UTR)==200 $hash{$gene_name}++; } } } } #print Dumper(\%hash); print "gene\tcounts\n"; for $key (keys %hash){ print $key,"\t",$hash{$key},"\n"; }