my %where = ('author_last_name'=>'Cantarel');$sub = 'IN (select papers.pubmed_id from papers inner join pubmed
using(pubmed_id) inner join journals using(journal_id) where journal_name like '%informatics' or journal_name like 'Molecular%')';
$where{'papers.pubmed_id'} = \$sub;
$q = $author_rs->search(\%where,
{
join=>'papers',
select => ['journal_id',{count => 'pubmed_id'}],
as => [qw/journal_id numpubs/],
group_by => [qw/journal_id/]
});
Tuesday, December 1, 2009
reading files from a dir in PERL
I always forget the silly syntax for this, so here it is:
#!/usr/bin/perl -w
opendir(DIR, ".");
@files = grep(/\.fasta$/,readdir(DIR));
closedir(DIR);
foreach $file (@files) {
#do something}
Friday, November 6, 2009
BIO::SeachIO to Bio::SeqIO
Here is how one would populate a Seq Object using a Search Object:
use Bio::SearchIO;
use Bio::SeqIO;
use Bio::Seq::RichSeq;
use Bio::SeqIO;
use Bio::Seq::RichSeq;
my $dna_file = shift @ARGV;
my $fasta_report = shift @ARGV;
my $outfile = shift @ARGV;
my $prefix = shift @ARGV;
my $seqio_object = Bio::SeqIO->new(-file => $dna_file);my %seq = (); my $seq_in = Bio::SeqIO->new('-file' => "<$dna_file",
'-format' => 'fasta');my $seq_out = Bio::SeqIO->new('-file' => ">$outfile",
'-format' => 'yourfavoriteformat');
while (my $inseq = $seq_in->next_seq) {
$seq{$inseq->id()} = $inseq->seq();}my %seen;
my $j = 0;
W1:while (my $r = $in->next_result) {
my $qname = (split(/\|/, $r->query_name))[0]; $j++; my $newacc = $prefix.sprintf("%06s",$j); my $dna_seq =$seq{$r->query_name};
$newseq = Bio::Seq::RichSeq->new(-seq=>$cdsdna,
-accession_number=>$newacc,
-locus=>$newacc,
-primary_id=>$newacc,
-display_id=>$newacc
);
my $i = 0;
W2:while (my $h = $r->next_hit) {
W3:while (my $hsp = $h->next_hsp) {
if ($seen{$qname}) {
foreach (@{$seen{$qname}}) {
next W2 if ($hsp->start('query') >= $_->{begin} && $hsp->start('query') <= $_->{end});
next W2 if ($hsp->end('query') >= $_->{begin} && $hsp->end('query') <= $_->{end});
}
} my $fasta_pseq = $hsp->seq_str;
$fasta_pseq =~ s/\\|\/|-//g; my $did = $qname.'|'.$r->algorithm."|cds".$i;
my $feat = Bio::SeqFeature::Generic->new(-start=>$hsp->start('query'),-end=>$hsp->end('query'),
-strand=>$hsp->strand('query'),-primary=>'CDS',-source=>'fastx',
-display_name=>$did,-score=>$hsp->bits,
-frame=>0,-tag=>{translation=>$fasta_pseq}
); $newseq->add_SeqFeature($feat);
} }}
$out->write_seq($newseq);
Subscribe to:
Posts (Atom)