package MaXMLParser; # # Release Date: 2001/10/04 # Written by KASUKAWA Takeya (kasukawa@gsc.riken.go.jp) # =head1 NAME MaXMLParser.pm - Parser for MaXML data =head1 SYNOPSIS use MaXMLParser; my $result = MaXMLParser->get_entry(['0610005A07'], undef); my $results = MaXMLParser->get_entries(['0610005K03'], ['gene_name']); MaXMLParser->set_base_url('http://fantom2.gsc.riken.go.jp/db/'); =head1 DESCRIPTION This module provide a class method to get and parse MaXML data from the FANTOM distribution site. =cut use strict; use vars qw($parsernb $maxmlurl); use LWP::UserAgent; use HTTP::Request; use XML::Parser; BEGIN { $maxmlurl = 'http://fantom2.gsc.riken.go.jp/db/maxml/maxmlseq.cgi'; } =head1 METHODS =head2 Get annotation $results = MaXMLParser->get_entries($idref, $qualifiers, $idtype) Get MaXML data from the FANTOM distribution server. $idref: reference to an array of IDs. $qualifiers: reference to an array of qualifiers of annotations to get if $qualifiers is undef, all annotation will be get. $idtype: type of IDs in $idref (e.g. seqid, cloneid, ..). if $idtype is undef, IDs in $idref are treated as clone IDs. return value ($results): $results: reference to an array of $result described below $result: annotations $result->{seqid}: sequence ID $result->{cloneid}: clone ID $result->{accession}: DDBJ accession $result->{mtime}: last modified time $result->{anndata}: reference to an array of annotation data $result->{anndata}->[0..]->{qualifier}: qualifier of each annotation data $result->{anndata}->[0..]->{anntext}: annotation text $result->{anndata}->[0..]->{datasrc}: data source name $result->{anndata}->[0..]->{srckey}: source key $result->{anndata}->[0..]->{evidence}: evidence code if no annotation are found, undef will be returned. =cut sub get_entry { my $results = get_entries(@_); if (!defined $results) { return undef; } $results->[0]; } sub get_entries { my $class = shift; my ($idref, $qualifiers, $idtype) = @_; my $ua = new LWP::UserAgent; $ua->agent('MaXMLParser/0.1 ' . $ua->agent); my $url = $maxmlurl . '?style=xml'; if (!defined $idtype) { $url .= '&masterid='.join(',', @$idref); } else { $url .= '&idtype=' . $idtype; $url .= '&id='.join(',', @$idref); } if (defined $qualifiers) { $url .= '&qualifier=' . join(',', @$qualifiers); } my $request = HTTP::Request->new('GET', $url); my $parser = new XML::Parser; $parser->setHandlers(Init=>\&newann_init, Final=>\&newann_final, Start=>\&newann_start, End=>\&newann_end, Char=>\&newann_char); sub newann_init { my ($expat) = @_; $expat->{my_inanns} = 0; $expat->{my_inread} = 1; $expat->{my_text} = ''; $expat->{my_annotation} = undef; $expat->{my_anndata} = undef; $expat->{my_results} = []; $expat->{my_result} = {}; $expat->{my_exist} = 0; } sub newann_final { my ($expat) = @_; my ($results, $exist) = ($expat->{my_results}, $expat->{my_exist}); ($results, $exist); } sub newann_start { my ($expat, $name) = @_; $expat->{my_inread} = 0 if ($name eq 'histories'); if ($expat->{my_inread}) { if ($name eq 'annotations') { $expat->{my_inanns} = 1; } } } sub newann_end { my ($expat, $name) = @_; if (!($expat->{my_inread})) { $expat->{my_inread} = 1 if ($name eq 'histories'); $expat->{my_text} = ''; return; } my $text = $expat->{my_text}; $expat->{my_text} = ''; $text =~ s/^\s*//; $text =~ s/\s*$//; if ($expat->{my_inanns}) { if ($name eq 'annotations') { $expat->{my_inanns} = 0; } elsif ($name eq 'annotation') { &process_anndata($expat->{my_result}, $expat->{my_anndata}); $expat->{my_anndata} = undef; } else { $expat->{my_anndata}->{$name} = $text if ($text ne ''); } } else { if ($name eq 'sequence') { &process_annotation($expat->{my_result}, $expat->{my_annotation}); push(@{$expat->{my_results}}, $expat->{my_result}); $expat->{my_result} = {}; $expat->{my_exist} = 1; } else { $name = 'mtime' if ($name eq 'modified_time'); $expat->{my_annotation}->{$name} = $text if ($text ne ''); } } } sub newann_char { my ($expat, $char) = @_; $expat->{my_text} .= $char; } sub process_annotation { my ($result, $data) = @_; foreach my $i (keys %$data) { $$data{$i} =~ s/\\n/\n/g; $$data{$i} = '' if ($$data{$i} =~ /^\s*$/); $result->{$i} = $$data{$i}; } } sub process_anndata { my ($result, $data) = @_; my $curanndata = {}; foreach my $i (keys %$data) { $$data{$i} =~ s/\\n/\n/g; $$data{$i} = '' if ($$data{$i} =~ /^\s*$/); $curanndata->{$i} = $$data{$i}; } push(@{$result->{anndata}}, $curanndata); } $parsernb = $parser->parse_start(); my $uares = $ua->request($request, \&parse); sub parse { my ($data, $response, $method) = @_; $parsernb->parse_more($data); } my ($results, $exist) = $parsernb->parse_done(); undef $parsernb; if ($uares->is_error) { die 'Failed to get data'; } if (!$exist) { return undef; } $results; } =head2 Set base URL MaXMLParser->set_base_url($url) Set BASE URL of viewer to get MaXML data =cut sub set_base_url { my $class = shift; my ($url) = @_; $url =~ s/\/$//; $url =~ s/\/maxmlseq.cgi$//; $url =~ s/\/maxml$//; $maxmlurl = $url . '/maxml/maxmlseq.cgi'; } 1;