package MaXMLParser;

#
# Release Date: 2001/10/04
#  Written by KASUKAWA Takeya (kasukawa@gsc.riken.go.jp)
#

=head1 NAME

MaXMLParser.pm - Parser for MaXML data

=head1 SYNOPSIS

 use MaXMLParser;
 my $result  = MaXMLParser->get_entry(['0610005A07'], undef);
 my $results = MaXMLParser->get_entries(['0610005K03'], ['gene_name']);

 MaXMLParser->set_base_url('http://fantom2.gsc.riken.go.jp/db/');

=head1 DESCRIPTION

This module provide a class method to get and parse MaXML data from the FANTOM distribution site.

=cut

use strict;
use vars qw($parsernb $maxmlurl);
use LWP::UserAgent;
use HTTP::Request;
use XML::Parser;

BEGIN {
    $maxmlurl = 'http://fantom2.gsc.riken.go.jp/db/maxml/maxmlseq.cgi';
}

=head1 METHODS

=head2 Get annotation

 $results = MaXMLParser->get_entries($idref, $qualifiers, $idtype)

 Get MaXML data from the FANTOM distribution server.

 $idref:      reference to an array of IDs.
 $qualifiers: reference to an array of qualifiers of annotations to get
   if $qualifiers is undef, all annotation will be get.
 $idtype:     type of IDs in $idref (e.g. seqid, cloneid, ..).
   if $idtype is undef, IDs in $idref are treated as clone IDs.

 return value ($results):
  $results: reference to an array of $result described below

  $result: annotations
  $result->{seqid}: sequence ID
  $result->{cloneid}: clone ID
  $result->{accession}: DDBJ accession
  $result->{mtime}: last modified time

  $result->{anndata}: reference to an array of annotation data
  $result->{anndata}->[0..]->{qualifier}: qualifier of each annotation data
  $result->{anndata}->[0..]->{anntext}:   annotation text
  $result->{anndata}->[0..]->{datasrc}:   data source name
  $result->{anndata}->[0..]->{srckey}:    source key
  $result->{anndata}->[0..]->{evidence}:  evidence code

  if no annotation are found, undef will be returned.

=cut

sub get_entry {
    my $results = get_entries(@_);
    if (!defined $results) {
	return undef;
    }
    $results->[0];
}

sub get_entries {
    my $class = shift;
    my ($idref, $qualifiers, $idtype)  = @_;

    my $ua = new LWP::UserAgent;
    $ua->agent('MaXMLParser/0.1 ' . $ua->agent);

    my $url = $maxmlurl . '?style=xml';
    if (!defined $idtype) {
	$url .= '&masterid='.join(',', @$idref);
    } else {
	$url .= '&idtype=' . $idtype;
	$url .= '&id='.join(',', @$idref);
    }
    if (defined $qualifiers) {
	$url .= '&qualifier=' . join(',', @$qualifiers);
    }

    my $request = HTTP::Request->new('GET', $url);

    my $parser = new XML::Parser;
    $parser->setHandlers(Init=>\&newann_init,
			 Final=>\&newann_final,
			 Start=>\&newann_start,
			 End=>\&newann_end,
			 Char=>\&newann_char);

    sub newann_init {
	my ($expat) = @_;
	$expat->{my_inanns} = 0;
	$expat->{my_inread} = 1;
	$expat->{my_text}   = '';
	$expat->{my_annotation} = undef;
	$expat->{my_anndata}    = undef;
	$expat->{my_results}    = [];
	$expat->{my_result}     = {};
	$expat->{my_exist}      = 0;
    }

    sub newann_final {
	my ($expat) = @_;
	my ($results, $exist) = ($expat->{my_results}, $expat->{my_exist});
	($results, $exist);
    }

    sub newann_start {
	my ($expat, $name) = @_;

	$expat->{my_inread} = 0 if ($name eq 'histories');
	if ($expat->{my_inread}) {
	    if ($name eq 'annotations') {
		$expat->{my_inanns} = 1;
	    }
	}
    }

    sub newann_end {
	my ($expat, $name) = @_;

	if (!($expat->{my_inread})) {
	    $expat->{my_inread} = 1 if ($name eq 'histories');
	    $expat->{my_text} = '';
	    return;
	}
	my $text = $expat->{my_text};
	$expat->{my_text} = '';

	$text =~ s/^\s*//;
	$text =~ s/\s*$//;
	if ($expat->{my_inanns}) {
	    if ($name eq 'annotations') {
		$expat->{my_inanns} = 0;
	    } elsif ($name eq 'annotation') {
		&process_anndata($expat->{my_result}, $expat->{my_anndata});
		$expat->{my_anndata} = undef;
	    } else {
		$expat->{my_anndata}->{$name} = $text if ($text ne '');
	    }
	} else {
	    if ($name eq 'sequence') {
		&process_annotation($expat->{my_result}, $expat->{my_annotation});
		push(@{$expat->{my_results}}, $expat->{my_result});
		$expat->{my_result} = {};
		$expat->{my_exist} = 1;
	    } else {
		$name = 'mtime' if ($name eq 'modified_time');	
		$expat->{my_annotation}->{$name} = $text if ($text ne '');
	    }
	}
    }

    sub newann_char {
	my ($expat, $char) = @_;
	$expat->{my_text} .= $char;
    }

    sub process_annotation {
	my ($result, $data) = @_;
	foreach my $i (keys %$data) {
	    $$data{$i} =~ s/\\n/\n/g;
	    $$data{$i} = '' if ($$data{$i} =~ /^\s*$/);
	    $result->{$i} = $$data{$i};
	}
    }

    sub process_anndata {
	my ($result, $data) = @_;
	my $curanndata = {};
	foreach my $i (keys %$data) {
	    $$data{$i} =~ s/\\n/\n/g;
	    $$data{$i} = '' if ($$data{$i} =~ /^\s*$/);
	    $curanndata->{$i} = $$data{$i};
	}
	push(@{$result->{anndata}}, $curanndata);
    }

    $parsernb = $parser->parse_start();
    my $uares = $ua->request($request, \&parse);

    sub parse {
	my ($data, $response, $method) = @_;
	$parsernb->parse_more($data);
    }

    my ($results, $exist) = $parsernb->parse_done();

    undef $parsernb;

    if ($uares->is_error) {
	die 'Failed to get data';
    }

    if (!$exist) {
	return undef;
    }
    $results;
}

=head2 Set base URL

 MaXMLParser->set_base_url($url)

 Set BASE URL of viewer to get MaXML data

=cut

sub set_base_url {
    my $class = shift;
    my ($url) = @_;
    $url =~ s/\/$//;
    $url =~ s/\/maxmlseq.cgi$//;
    $url =~ s/\/maxml$//;
    $maxmlurl = $url . '/maxml/maxmlseq.cgi';
}

1;