1 |
gpertea |
23 |
#!/bin/tcsh -f |
2 |
|
|
if ("x"$1 == "x-h") then |
3 |
|
|
echo "Usage: gtfcount [-g][-c][-l] <gtfile>" |
4 |
|
|
echo " Show the count of distinct transcript names.\n Options:" |
5 |
|
|
echo " -g : report distinct gene names count (gene_id attribute)" |
6 |
|
|
echo " -c : report transcript counts per reference sequence" |
7 |
gpertea |
71 |
echo " -C : show count of coding transcripts" |
8 |
gpertea |
23 |
echo " -l : list transcripts" |
9 |
gpertea |
71 |
echo " -Cl : list coding transcripts" |
10 |
gpertea |
23 |
echo " -f : report transcript counts per gene_id" |
11 |
|
|
exit 1 |
12 |
|
|
endif |
13 |
|
|
if ("x"$1 == "x-g") then |
14 |
|
|
shift |
15 |
|
|
perl -ne 'print "$1\n" if m/gene_id "([^"]+)/' $1 | sort -u | wc -l |
16 |
|
|
exit |
17 |
|
|
endif |
18 |
|
|
if ("x"$1 == "x-l") then |
19 |
|
|
shift |
20 |
|
|
perl -ne 'print "$1\n" if m/transcript_id "([^"]+)/' $1 | sort -u |
21 |
|
|
exit |
22 |
|
|
endif |
23 |
gpertea |
71 |
if ("x"$1 == "x-Cl") then |
24 |
|
|
shift |
25 |
|
|
perl -ne 'print "$1\n" if m/\tCDS\t/ && m/transcript_id "([^"]+)/' $1 | sort -u |
26 |
|
|
exit |
27 |
|
|
endif |
28 |
gpertea |
23 |
if ("x"$1 == "x-c") then |
29 |
|
|
shift |
30 |
|
|
perl -ne '@t=split(/\t/);print "$t[0]\t$1\n" if $t[8]=~m/transcript_id "([^"]+)/' $1 | sort -u | cut -f1 | uniq -c |
31 |
|
|
exit |
32 |
|
|
endif |
33 |
gpertea |
71 |
if ("x"$1 == "x-C") then |
34 |
|
|
shift |
35 |
|
|
perl -ne 'print "$1\n" if m/\tCDS\t/ && m/transcript_id "([^"]+)/' $1 | sort -u | wc -l |
36 |
|
|
exit |
37 |
|
|
endif |
38 |
|
|
|
39 |
gpertea |
23 |
if ("x"$1 == "x-f") then |
40 |
|
|
shift |
41 |
|
|
perl -ne '($g)=(m/gene_id\s+"([^"]+)/); ($t)=(m/transcript_id\s+"([^"]+)/); print "$g\t$t\n" if $g && $t' $1 | sort -u | cut -f1 | uniq -c |
42 |
|
|
exit |
43 |
|
|
endif |
44 |
gpertea |
71 |
|
45 |
gpertea |
23 |
perl -ne 'print "$1\n" if m/transcript_id "([^"]+)/' $1 | sort -u | wc -l |