1 |
#!/bin/tcsh -f |
2 |
if ("x"$1 == "x-h") then |
3 |
echo "Usage: gtfcount [-g][-c][-l] <gtfile>" |
4 |
echo " Show the count of distinct transcript names.\n Options:" |
5 |
echo " -g : report distinct gene names count (gene_id attribute)" |
6 |
echo " -c : report transcript counts per reference sequence" |
7 |
echo " -l : list transcripts" |
8 |
echo " -f : report transcript counts per gene_id" |
9 |
exit 1 |
10 |
endif |
11 |
if ("x"$1 == "x-g") then |
12 |
shift |
13 |
perl -ne 'print "$1\n" if m/gene_id "([^"]+)/' $1 | sort -u | wc -l |
14 |
exit |
15 |
endif |
16 |
if ("x"$1 == "x-l") then |
17 |
shift |
18 |
perl -ne 'print "$1\n" if m/transcript_id "([^"]+)/' $1 | sort -u |
19 |
exit |
20 |
endif |
21 |
if ("x"$1 == "x-c") then |
22 |
shift |
23 |
perl -ne '@t=split(/\t/);print "$t[0]\t$1\n" if $t[8]=~m/transcript_id "([^"]+)/' $1 | sort -u | cut -f1 | uniq -c |
24 |
exit |
25 |
endif |
26 |
if ("x"$1 == "x-f") then |
27 |
shift |
28 |
perl -ne '($g)=(m/gene_id\s+"([^"]+)/); ($t)=(m/transcript_id\s+"([^"]+)/); print "$g\t$t\n" if $g && $t' $1 | sort -u | cut -f1 | uniq -c |
29 |
exit |
30 |
endif |
31 |
perl -ne 'print "$1\n" if m/transcript_id "([^"]+)/' $1 | sort -u | wc -l |