I have a file which looks like this:
80,1p21
81,19q13
82,6p12.3
83,Xp11.22
84,3pter-q21
86,3q26.33
87,14q24.1-q24.2|14q24|14q22-q24
88,1q42-q43
8
I actually found the answer to this. The code looks a bit complicated though.
#!/usr/bin/env perl
use strict;
use warnings;
sub main {
my $file;
if (@ARGV != 1) {
die "Usage: perl hashofhash_sort.pl \n";
}
else {
$file = $ARGV[0];
}
open(IN, $file) or die "Error!! Cannot open the $file file: $!\n";
my @file = ;
chomp @file;
my ($entrez_gene, $loci, $chr, $band, $pq, $band_num);
my (%chromosome, %loci_entrez);
foreach my $line (@file) {
if ($line =~ /(\d+),(.+)/) {
# Entrez genes
$entrez_gene = $1;
# Locus like 12p23.4
$loci = $2;
if ($loci =~ /^(\d+)(.+)?/) {
# chromosome number alone (only numericals)
$chr = $1;
if ($2) {
# locus minus chromosome number. If 12p23.4, then $band is p23.4
$band = "$2";
if ($band =~ /^([pq])(.+)/) {
# either p or q
$pq = $1;
# stores the numericals. for p23.4, stores 23.4
$band_num = $2;
}
if (exists $chromosome{$chr}) {
if (exists $chromosome{$chr}{$pq}) {
push (@{$chromosome{$chr}{$pq}}, $band_num);
}
else {
$chromosome{$chr}{$pq} = [$band_num];
}
}
else {
$chromosome{$chr}{$pq} = [$band_num];
}
}
}
}
} # End of foreach loop
foreach my $key (sort {$a <=> $b} keys %chromosome) {
my %seen = ();
foreach my $key2 (sort {$a cmp $b } keys %{$chromosome{$key}}) {
my @unique = grep { ! $seen{$_}++ } @{$chromosome{$key}{$key2}};
my @sorted = sort @unique;
foreach my $element (@sorted) {
my $sorted_locus = "$key$key2$element";
if (exists $loci_entrez{$sorted_locus}) {
foreach my $element2 (@{$loci_entrez{$sorted_locus}}) {
print "$element2,$sorted_locus\n";
}
}
}
}
}
} # End of main
main();