Friday, November 6, 2009

BIO::SeachIO to Bio::SeqIO


Here is how one would populate a Seq Object using a Search Object:


use Bio::SearchIO;
use Bio::SeqIO;
use Bio::Seq::RichSeq;

my $dna_file = shift @ARGV;
my $fasta_report = shift @ARGV;
my $outfile = shift @ARGV;
my $prefix = shift @ARGV;

my $seqio_object = Bio::SeqIO->new(-file => $dna_file);
my %seq = (); 
my $seq_in = Bio::SeqIO->new('-file' => "<$dna_file",
                             '-format' => 'fasta');
my $seq_out = Bio::SeqIO->new('-file' => ">$outfile",
                              '-format' => 'yourfavoriteformat');

while (my $inseq = $seq_in->next_seq) {
     $seq{$inseq->id()} = $inseq->seq();
}
my %seen;
my $j = 0;
W1:while (my $r = $in->next_result) {
  my $qname = (split(/\|/, $r->query_name))[0];
  $j++;
  my $newacc = $prefix.sprintf("%06s",$j);
  my $dna_seq =$seq{$r->query_name};  
  $newseq = Bio::Seq::RichSeq->new(-seq=>$cdsdna,
         -accession_number=>$newacc,
         -locus=>$newacc,
         -primary_id=>$newacc,
         -display_id=>$newacc
        );

   my $i = 0;
W2:while (my $h = $r->next_hit) {
 W3:while (my $hsp = $h->next_hsp) {
  if ($seen{$qname}) {
      foreach (@{$seen{$qname}}) {
           next W2 if ($hsp->start('query') >= $_->{begin} 
               && $hsp->start('query') <= $_->{end});
           next W2 if ($hsp->end('query') >= $_->{begin} 
               && $hsp->end('query') <= $_->{end});
        }
   } 
    my $fasta_pseq = $hsp->seq_str;
       $fasta_pseq =~ s/\\|\/|-//g;
                my $did = $qname.'|'.$r->algorithm."|cds".$i;
                my $feat = Bio::SeqFeature::Generic->new(-start=>$hsp->start('query'),-end=>$hsp->end('query'),
        -strand=>$hsp->strand('query'),-primary=>'CDS',-source=>'fastx',
        -display_name=>$did,-score=>$hsp->bits,
        -frame=>0,-tag=>{translation=>$fasta_pseq}
       );
                $newseq->add_SeqFeature($feat);

            }
       }
} 
$out->write_seq($newseq);