Tuesday, December 1, 2009

Doin' a subselect in DBIx::Class

my %where = ('author_last_name'=>'Cantarel');$sub = 'IN (select papers.pubmed_id from papers inner join pubmed
using(pubmed_id) inner join journals using(journal_id) where  journal_name like '%informatics' or journal_name like 'Molecular%')';
$where{'papers.pubmed_id'}  = \$sub;

$q = $author_rs->search(\%where,
                  {
                   join=>'papers',
                   select   => ['journal_id',{count => 'pubmed_id'}],
                   as       => [qw/journal_id numpubs/],
                   group_by => [qw/journal_id/]
                  });

reading files from a dir in PERL

I always forget the silly syntax for this, so here it is:

#!/usr/bin/perl -w

opendir(DIR, ".");
@files = grep(/\.fasta$/,readdir(DIR));
closedir(DIR);

foreach $file (@files) {
   #do something
}

Friday, November 6, 2009

BIO::SeachIO to Bio::SeqIO


Here is how one would populate a Seq Object using a Search Object:


use Bio::SearchIO;
use Bio::SeqIO;
use Bio::Seq::RichSeq;

my $dna_file = shift @ARGV;
my $fasta_report = shift @ARGV;
my $outfile = shift @ARGV;
my $prefix = shift @ARGV;

my $seqio_object = Bio::SeqIO->new(-file => $dna_file);
my %seq = (); 
my $seq_in = Bio::SeqIO->new('-file' => "<$dna_file",
                             '-format' => 'fasta');
my $seq_out = Bio::SeqIO->new('-file' => ">$outfile",
                              '-format' => 'yourfavoriteformat');

while (my $inseq = $seq_in->next_seq) {
     $seq{$inseq->id()} = $inseq->seq();
}
my %seen;
my $j = 0;
W1:while (my $r = $in->next_result) {
  my $qname = (split(/\|/, $r->query_name))[0];
  $j++;
  my $newacc = $prefix.sprintf("%06s",$j);
  my $dna_seq =$seq{$r->query_name};  
  $newseq = Bio::Seq::RichSeq->new(-seq=>$cdsdna,
         -accession_number=>$newacc,
         -locus=>$newacc,
         -primary_id=>$newacc,
         -display_id=>$newacc
        );

   my $i = 0;
W2:while (my $h = $r->next_hit) {
 W3:while (my $hsp = $h->next_hsp) {
  if ($seen{$qname}) {
      foreach (@{$seen{$qname}}) {
           next W2 if ($hsp->start('query') >= $_->{begin} 
               && $hsp->start('query') <= $_->{end});
           next W2 if ($hsp->end('query') >= $_->{begin} 
               && $hsp->end('query') <= $_->{end});
        }
   } 
    my $fasta_pseq = $hsp->seq_str;
       $fasta_pseq =~ s/\\|\/|-//g;
                my $did = $qname.'|'.$r->algorithm."|cds".$i;
                my $feat = Bio::SeqFeature::Generic->new(-start=>$hsp->start('query'),-end=>$hsp->end('query'),
        -strand=>$hsp->strand('query'),-primary=>'CDS',-source=>'fastx',
        -display_name=>$did,-score=>$hsp->bits,
        -frame=>0,-tag=>{translation=>$fasta_pseq}
       );
                $newseq->add_SeqFeature($feat);

            }
       }
} 
$out->write_seq($newseq);