[med-svn] [libbio-eutilities-perl] 01/02: New upstream version 1.75
Andreas Tille
tille at debian.org
Mon Jan 23 12:33:28 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository libbio-eutilities-perl.
commit b9f7964a06763fcf4289135938311192ebdaae83
Author: Andreas Tille <tille at debian.org>
Date: Mon Jan 23 13:31:07 2017 +0100
New upstream version 1.75
---
Changes | 30 +
LICENSE | 379 +++++
MANIFEST | 74 +
MANIFEST.SKIP | 1 +
META.json | 538 +++++++
META.yml | 400 +++++
Makefile.PL | 98 ++
bin/bp_einfo | 156 ++
bin/bp_genbank_ref_extractor | 1175 +++++++++++++++
dist.ini | 24 +
inc/TestHelper.pm | 64 +
lib/Bio/DB/EUtilities.pm | 1257 ++++++++++++++++
lib/Bio/Tools/EUtilities.pm | 1544 ++++++++++++++++++++
lib/Bio/Tools/EUtilities/EUtilDataI.pm | 216 +++
lib/Bio/Tools/EUtilities/EUtilParameters.pm | 700 +++++++++
lib/Bio/Tools/EUtilities/History.pm | 171 +++
lib/Bio/Tools/EUtilities/HistoryI.pm | 150 ++
lib/Bio/Tools/EUtilities/Info.pm | 231 +++
lib/Bio/Tools/EUtilities/Info/FieldInfo.pm | 292 ++++
lib/Bio/Tools/EUtilities/Info/LinkInfo.pm | 280 ++++
lib/Bio/Tools/EUtilities/Link.pm | 162 ++
lib/Bio/Tools/EUtilities/Link/LinkSet.pm | 571 ++++++++
lib/Bio/Tools/EUtilities/Link/UrlLink.pm | 256 ++++
lib/Bio/Tools/EUtilities/Query.pm | 192 +++
lib/Bio/Tools/EUtilities/Query/GlobalQuery.pm | 193 +++
lib/Bio/Tools/EUtilities/Summary.pm | 140 ++
lib/Bio/Tools/EUtilities/Summary/DocSum.pm | 280 ++++
lib/Bio/Tools/EUtilities/Summary/Item.pm | 526 +++++++
lib/Bio/Tools/EUtilities/Summary/ItemContainerI.pm | 238 +++
t/00-compile.t | 102 ++
t/EUtilParameters.t | 60 +
t/author-mojibake.t | 17 +
t/author-pod-syntax.t | 15 +
t/data/eutils/egquery.xml | 262 ++++
t/data/eutils/einfo.xml | 742 ++++++++++
t/data/eutils/einfo_dbs.xml | 43 +
t/data/eutils/elink_acheck.xml | 439 ++++++
t/data/eutils/elink_acheck_corr.xml | 454 ++++++
t/data/eutils/elink_dball.xml | 116 ++
t/data/eutils/elink_lcheck.xml | 13 +
t/data/eutils/elink_lcheck_corr.xml | 28 +
t/data/eutils/elink_llinks.xml | 61 +
t/data/eutils/elink_llinks_corr.xml | 76 +
t/data/eutils/elink_multidb.xml | 29 +
t/data/eutils/elink_multidb_corr.xml | 56 +
t/data/eutils/elink_ncheck.xml | 13 +
t/data/eutils/elink_ncheck_corr.xml | 28 +
t/data/eutils/elink_neighbor.xml | 42 +
t/data/eutils/elink_neighbor_corr.xml | 73 +
t/data/eutils/elink_nhist.xml | 24 +
t/data/eutils/elink_nhist_corr.xml | 62 +
t/data/eutils/elink_scores.xml | 66 +
t/data/eutils/epost.xml | 6 +
t/data/eutils/esearch1.xml | 59 +
t/data/eutils/esearch2.xml | 61 +
t/data/eutils/espell.xml | 9 +
t/data/eutils/esummary1.xml | 89 ++
t/data/eutils/esummary2.xml | 130 ++
t/egquery.t | 40 +
t/einfo.t | 85 ++
t/elink_acheck.t | 215 +++
t/elink_lcheck.t | 132 ++
t/elink_llinks.t | 154 ++
t/elink_ncheck.t | 124 ++
t/elink_neighbor.t | 129 ++
t/elink_neighbor_history.t | 134 ++
t/elink_scores.t | 106 ++
t/epost.t | 51 +
t/esearch.t | 91 ++
t/espell.t | 48 +
t/esummary.t | 187 +++
t/release-EUtilities.t | 458 ++++++
t/release-eol.t | 85 ++
73 files changed, 15552 insertions(+)
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..055cf5a
--- /dev/null
+++ b/Changes
@@ -0,0 +1,30 @@
+Revision history for Bio-EUtilities
+
+1.75 2016-09-25 23:06:59-05:00 America/Chicago
+
+1.74 2016-09-25 20:09:15-05:00 America/Chicago
+ * NCBI related change from HTTP to HTTPS
+ * Add preliminary 'idtype' attribute for some eutils, which will be
+ needed for upcoming NCBI changes with migration of GI to Accession.Version
+ * Remove usage of 'given' and 'when' which have been downgraded to
+ experimental on perl 5.18.
+ * Fixed released tarball so it installs its programs `bp_einfo` and
+ `bp_genbank_ref_extractor`
+
+1.73 2013-09-08 22:17:18 America/Chicago
+ * LWP::UserAgent is required for Bio::DB::EUtilities, now added
+ * New script for download of all sequences from Entrez Gene queries
+
+1.72 2012-01-17 22:12:06 America/Chicago
+ * Add explicit prereqs
+ * Migrate all tests from Bio::Root::Test, add Test::More as
+ a build requirement, add helper module for test loading
+ * Remove Data::Dumper from modules
+
+1.71 2012-01-12 16:20:26 America/Chicago
+ * Full release (no changes from previous release)
+
+1.70 2012-01-12 14:38:27 America/Chicago
+ * Trial release
+ * Initial split from bioperl-live; history carried over [cjfields]
+ * No changes from last BioPerl release beyond splitting off.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..dd55e09
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,379 @@
+This software is copyright (c) 2013 by Chris Fields.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+Terms of the Perl programming language system itself
+
+a) the GNU General Public License as published by the Free
+ Software Foundation; either version 1, or (at your option) any
+ later version, or
+b) the "Artistic License"
+
+--- The GNU General Public License, Version 1, February 1989 ---
+
+This software is Copyright (c) 2013 by Chris Fields.
+
+This is free software, licensed under:
+
+ The GNU General Public License, Version 1, February 1989
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 1, February 1989
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The license agreements of most software companies try to keep users
+at the mercy of those companies. By contrast, our General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. The
+General Public License applies to the Free Software Foundation's
+software and to any other program whose authors commit to using it.
+You can use it for your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Specifically, the General Public License is designed to make
+sure that you have the freedom to give away or sell copies of free
+software, that you receive source code or can get it if you want it,
+that you can change the software or use pieces of it in new free
+programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of a such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must tell them their rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any program or other work which
+contains a notice placed by the copyright holder saying it may be
+distributed under the terms of this General Public License. The
+"Program", below, refers to any such program or work, and a "work based
+on the Program" means either the Program or any work containing the
+Program or a portion of it, either verbatim or with modifications. Each
+licensee is addressed as "you".
+
+ 1. You may copy and distribute verbatim copies of the Program's source
+code as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice and
+disclaimer of warranty; keep intact all the notices that refer to this
+General Public License and to the absence of any warranty; and give any
+other recipients of the Program a copy of this General Public License
+along with the Program. You may charge a fee for the physical act of
+transferring a copy.
+
+ 2. You may modify your copy or copies of the Program or any portion of
+it, and copy and distribute such modifications under the terms of Paragraph
+1 above, provided that you also do the following:
+
+ a) cause the modified files to carry prominent notices stating that
+ you changed the files and the date of any change; and
+
+ b) cause the whole of any work that you distribute or publish, that
+ in whole or in part contains the Program or any part thereof, either
+ with or without modifications, to be licensed at no charge to all
+ third parties under the terms of this General Public License (except
+ that you may choose to grant warranty protection to some or all
+ third parties, at your option).
+
+ c) If the modified program normally reads commands interactively when
+ run, you must cause it, when started running for such interactive use
+ in the simplest and most usual way, to print or display an
+ announcement including an appropriate copyright notice and a notice
+ that there is no warranty (or else, saying that you provide a
+ warranty) and that users may redistribute the program under these
+ conditions, and telling the user how to view a copy of this General
+ Public License.
+
+ d) You may charge a fee for the physical act of transferring a
+ copy, and you may at your option offer warranty protection in
+ exchange for a fee.
+
+Mere aggregation of another independent work with the Program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other work under the scope of these terms.
+
+ 3. You may copy and distribute the Program (or a portion or derivative of
+it, under Paragraph 2) in object code or executable form under the terms of
+Paragraphs 1 and 2 above provided that you also do one of the following:
+
+ a) accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ b) accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal charge
+ for the cost of distribution) a complete machine-readable copy of the
+ corresponding source code, to be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ c) accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+Source code for a work means the preferred form of the work for making
+modifications to it. For an executable file, complete source code means
+all the source code for all modules it contains; but, as a special
+exception, it need not include source code for modules which are standard
+libraries that accompany the operating system on which the executable
+file runs, or for standard header files or definitions files that
+accompany that operating system.
+
+ 4. You may not copy, modify, sublicense, distribute or transfer the
+Program except as expressly provided under this General Public License.
+Any attempt otherwise to copy, modify, sublicense, distribute or transfer
+the Program is void, and will automatically terminate your rights to use
+the Program under this License. However, parties who have received
+copies, or rights to use copies, from you under this General Public
+License will not have their licenses terminated so long as such parties
+remain in full compliance.
+
+ 5. By copying, distributing or modifying the Program (or any work based
+on the Program) you indicate your acceptance of this license to do so,
+and all its terms and conditions.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the original
+licensor to copy, distribute or modify the Program subject to these
+terms and conditions. You may not impose any further restrictions on the
+recipients' exercise of the rights granted herein.
+
+ 7. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of the license which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+the license, you may choose any version ever published by the Free Software
+Foundation.
+
+ 8. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to humanity, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+ To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19xx name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ program `Gnomovision' (a program to direct compilers to make passes
+ at assemblers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
+--- The Artistic License 1.0 ---
+
+This software is Copyright (c) 2013 by Chris Fields.
+
+This is free software, licensed under:
+
+ The Artistic License 1.0
+
+The Artistic License
+
+Preamble
+
+The intent of this document is to state the conditions under which a Package
+may be copied, such that the Copyright Holder maintains some semblance of
+artistic control over the development of the package, while giving the users of
+the package the right to use and distribute the Package in a more-or-less
+customary fashion, plus the right to make reasonable modifications.
+
+Definitions:
+
+ - "Package" refers to the collection of files distributed by the Copyright
+ Holder, and derivatives of that collection of files created through
+ textual modification.
+ - "Standard Version" refers to such a Package if it has not been modified,
+ or has been modified in accordance with the wishes of the Copyright
+ Holder.
+ - "Copyright Holder" is whoever is named in the copyright or copyrights for
+ the package.
+ - "You" is you, if you're thinking about copying or distributing this Package.
+ - "Reasonable copying fee" is whatever you can justify on the basis of media
+ cost, duplication charges, time of people involved, and so on. (You will
+ not be required to justify it to the Copyright Holder, but only to the
+ computing community at large as a market that must bear the fee.)
+ - "Freely Available" means that no fee is charged for the item itself, though
+ there may be fees involved in handling the item. It also means that
+ recipients of the item may redistribute it under the same conditions they
+ received it.
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you
+duplicate all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications derived
+from the Public Domain or from the Copyright Holder. A Package modified in such
+a way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided that
+you insert a prominent notice in each changed file stating how and when you
+changed that file, and provided that you do at least ONE of the following:
+
+ a) place your modifications in the Public Domain or otherwise make them
+ Freely Available, such as by posting said modifications to Usenet or an
+ equivalent medium, or placing the modifications on a major archive site
+ such as ftp.uu.net, or by allowing the Copyright Holder to include your
+ modifications in the Standard Version of the Package.
+
+ b) use the modified Package only within your corporation or organization.
+
+ c) rename any non-standard executables so the names do not conflict with
+ standard executables, which must also be provided, and provide a separate
+ manual page for each non-standard executable that clearly documents how it
+ differs from the Standard Version.
+
+ d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or executable
+form, provided that you do at least ONE of the following:
+
+ a) distribute a Standard Version of the executables and library files,
+ together with instructions (in the manual page or equivalent) on where to
+ get the Standard Version.
+
+ b) accompany the distribution with the machine-readable source of the Package
+ with your modifications.
+
+ c) accompany any non-standard executables with their corresponding Standard
+ Version executables, giving the non-standard executables non-standard
+ names, and clearly documenting the differences in manual pages (or
+ equivalent), together with instructions on where to get the Standard
+ Version.
+
+ d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this
+Package. You may charge any fee you choose for support of this Package. You
+may not charge a fee for this Package itself. However, you may distribute this
+Package in aggregate with other (possibly commercial) programs as part of a
+larger (possibly commercial) software distribution provided that you do not
+advertise this Package as a product of your own.
+
+6. The scripts and library files supplied as input to or produced as output
+from the programs of this Package do not automatically fall under the copyright
+of this Package, but belong to whomever generated them, and may be sold
+commercially, and may be aggregated with this Package.
+
+7. C or perl subroutines supplied by you and linked into this Package shall not
+be considered part of this Package.
+
+8. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+The End
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..39bb7a3
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,74 @@
+# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.007.
+Changes
+LICENSE
+MANIFEST
+MANIFEST.SKIP
+META.json
+META.yml
+Makefile.PL
+bin/bp_einfo
+bin/bp_genbank_ref_extractor
+dist.ini
+inc/TestHelper.pm
+lib/Bio/DB/EUtilities.pm
+lib/Bio/Tools/EUtilities.pm
+lib/Bio/Tools/EUtilities/EUtilDataI.pm
+lib/Bio/Tools/EUtilities/EUtilParameters.pm
+lib/Bio/Tools/EUtilities/History.pm
+lib/Bio/Tools/EUtilities/HistoryI.pm
+lib/Bio/Tools/EUtilities/Info.pm
+lib/Bio/Tools/EUtilities/Info/FieldInfo.pm
+lib/Bio/Tools/EUtilities/Info/LinkInfo.pm
+lib/Bio/Tools/EUtilities/Link.pm
+lib/Bio/Tools/EUtilities/Link/LinkSet.pm
+lib/Bio/Tools/EUtilities/Link/UrlLink.pm
+lib/Bio/Tools/EUtilities/Query.pm
+lib/Bio/Tools/EUtilities/Query/GlobalQuery.pm
+lib/Bio/Tools/EUtilities/Summary.pm
+lib/Bio/Tools/EUtilities/Summary/DocSum.pm
+lib/Bio/Tools/EUtilities/Summary/Item.pm
+lib/Bio/Tools/EUtilities/Summary/ItemContainerI.pm
+t/00-compile.t
+t/EUtilParameters.t
+t/author-mojibake.t
+t/author-pod-syntax.t
+t/data/eutils/egquery.xml
+t/data/eutils/einfo.xml
+t/data/eutils/einfo_dbs.xml
+t/data/eutils/elink_acheck.xml
+t/data/eutils/elink_acheck_corr.xml
+t/data/eutils/elink_dball.xml
+t/data/eutils/elink_lcheck.xml
+t/data/eutils/elink_lcheck_corr.xml
+t/data/eutils/elink_llinks.xml
+t/data/eutils/elink_llinks_corr.xml
+t/data/eutils/elink_multidb.xml
+t/data/eutils/elink_multidb_corr.xml
+t/data/eutils/elink_ncheck.xml
+t/data/eutils/elink_ncheck_corr.xml
+t/data/eutils/elink_neighbor.xml
+t/data/eutils/elink_neighbor_corr.xml
+t/data/eutils/elink_nhist.xml
+t/data/eutils/elink_nhist_corr.xml
+t/data/eutils/elink_scores.xml
+t/data/eutils/epost.xml
+t/data/eutils/esearch1.xml
+t/data/eutils/esearch2.xml
+t/data/eutils/espell.xml
+t/data/eutils/esummary1.xml
+t/data/eutils/esummary2.xml
+t/egquery.t
+t/einfo.t
+t/elink_acheck.t
+t/elink_lcheck.t
+t/elink_llinks.t
+t/elink_ncheck.t
+t/elink_neighbor.t
+t/elink_neighbor_history.t
+t/elink_scores.t
+t/epost.t
+t/esearch.t
+t/espell.t
+t/esummary.t
+t/release-EUtilities.t
+t/release-eol.t
diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
new file mode 100644
index 0000000..d301a05
--- /dev/null
+++ b/MANIFEST.SKIP
@@ -0,0 +1 @@
+.*\.komodoproject
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..bfe5d7e
--- /dev/null
+++ b/META.json
@@ -0,0 +1,538 @@
+{
+ "abstract" : "Webagent which interacts with and retrieves data from NCBI's eUtils.",
+ "author" : [
+ "cjfields <cjfields at cpan.org>"
+ ],
+ "dynamic_config" : 0,
+ "generated_by" : "Dist::Zilla version 6.007, CPAN::Meta::Converter version 2.150001",
+ "license" : [
+ "perl_5"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : 2
+ },
+ "name" : "Bio-EUtilities",
+ "prereqs" : {
+ "configure" : {
+ "requires" : {
+ "ExtUtils::MakeMaker" : "0"
+ }
+ },
+ "develop" : {
+ "requires" : {
+ "Test::EOL" : "0",
+ "Test::Mojibake" : "0",
+ "Test::More" : "0.88",
+ "Test::Pod" : "1.41"
+ }
+ },
+ "runtime" : {
+ "requires" : {
+ "Bio::ASN1::EntrezGene" : "0",
+ "Bio::DB::GenericWebAgent" : "0",
+ "Bio::ParameterBaseI" : "0",
+ "Bio::Root::IO" : "0",
+ "Bio::Root::Root" : "0",
+ "Bio::Root::RootI" : "0",
+ "Bio::Root::Version" : "0",
+ "Bio::SeqIO" : "0",
+ "Cwd" : "0",
+ "Data::Dumper" : "0",
+ "File::Spec" : "0",
+ "Getopt::Long" : "0",
+ "HTTP::Request" : "0",
+ "LWP::UserAgent" : "0",
+ "Text::CSV" : "0",
+ "Text::Wrap" : "0",
+ "URI" : "0",
+ "XML::Simple" : "0",
+ "base" : "0",
+ "perl" : "5.010",
+ "strict" : "0",
+ "utf8" : "0",
+ "warnings" : "0"
+ }
+ },
+ "test" : {
+ "requires" : {
+ "File::Spec" : "0",
+ "IO::Handle" : "0",
+ "IPC::Open3" : "0",
+ "Test::More" : "0"
+ }
+ }
+ },
+ "release_status" : "stable",
+ "resources" : {
+ "bugtracker" : {
+ "mailto" : "bioperl-l at bioperl.org",
+ "web" : "https://github.com/bioperl/%%7Bdist%7D"
+ },
+ "homepage" : "https://metacpan.org/release/Bio-EUtilities",
+ "repository" : {
+ "type" : "git",
+ "url" : "git://github.com/bioperl/bio-eutilities.git",
+ "web" : "https://github.com/bioperl/bio-eutilities"
+ }
+ },
+ "version" : "1.75",
+ "x_Dist_Zilla" : {
+ "perl" : {
+ "version" : "5.022001"
+ },
+ "plugins" : [
+ {
+ "class" : "Dist::Zilla::Plugin::GatherDir",
+ "config" : {
+ "Dist::Zilla::Plugin::GatherDir" : {
+ "exclude_filename" : [],
+ "exclude_match" : [],
+ "follow_symlinks" : 0,
+ "include_dotfiles" : 0,
+ "prefix" : "",
+ "prune_directory" : [],
+ "root" : "."
+ }
+ },
+ "name" : "@Filter/@Filter/GatherDir",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::PruneCruft",
+ "name" : "@Filter/@Filter/PruneCruft",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::ManifestSkip",
+ "name" : "@Filter/@Filter/ManifestSkip",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::MetaYAML",
+ "name" : "@Filter/@Filter/MetaYAML",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::License",
+ "name" : "@Filter/@Filter/License",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::ExtraTests",
+ "name" : "@Filter/@Filter/ExtraTests",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::ExecDir",
+ "name" : "@Filter/@Filter/ExecDir",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::ShareDir",
+ "name" : "@Filter/@Filter/ShareDir",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::MakeMaker",
+ "config" : {
+ "Dist::Zilla::Role::TestRunner" : {
+ "default_jobs" : 1
+ }
+ },
+ "name" : "@Filter/@Filter/MakeMaker",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::Manifest",
+ "name" : "@Filter/@Filter/Manifest",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::TestRelease",
+ "name" : "@Filter/@Filter/TestRelease",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::ConfirmRelease",
+ "name" : "@Filter/@Filter/ConfirmRelease",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::UploadToCPAN",
+ "name" : "@Filter/@Filter/UploadToCPAN",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::MetaConfig",
+ "name" : "@Filter/MetaConfig",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::MetaJSON",
+ "name" : "@Filter/MetaJSON",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::PkgVersion",
+ "name" : "@Filter/PkgVersion",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::PodSyntaxTests",
+ "name" : "@Filter/PodSyntaxTests",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::NextRelease",
+ "name" : "@Filter/NextRelease",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::Test::Compile",
+ "config" : {
+ "Dist::Zilla::Plugin::Test::Compile" : {
+ "bail_out_on_fail" : "0",
+ "fail_on_warning" : "author",
+ "fake_home" : 0,
+ "filename" : "t/00-compile.t",
+ "module_finder" : [
+ ":InstallModules"
+ ],
+ "needs_display" : 0,
+ "phase" : "test",
+ "script_finder" : [
+ ":PerlExecFiles"
+ ],
+ "skips" : []
+ }
+ },
+ "name" : "@Filter/Test::Compile",
+ "version" : "2.054"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::MojibakeTests",
+ "name" : "@Filter/MojibakeTests",
+ "version" : "0.8"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::RunExtraTests",
+ "config" : {
+ "Dist::Zilla::Role::TestRunner" : {
+ "default_jobs" : 1
+ }
+ },
+ "name" : "@Filter/RunExtraTests",
+ "version" : "0.029"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::AutoMetaResources",
+ "name" : "@Filter/AutoMetaResources",
+ "version" : "1.21"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::MetaResources",
+ "name" : "@Filter/MetaResources",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::Authority",
+ "name" : "@Filter/Authority",
+ "version" : "1.009"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::EOLTests",
+ "config" : {
+ "Dist::Zilla::Plugin::Test::EOL" : {
+ "filename" : "xt/release/eol.t",
+ "finder" : [
+ ":ExecFiles",
+ ":InstallModules",
+ ":TestFiles"
+ ],
+ "trailing_whitespace" : 1,
+ "version" : "0.19"
+ }
+ },
+ "name" : "@Filter/EOLTests",
+ "version" : "0.19"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::PodWeaver",
+ "config" : {
+ "Dist::Zilla::Plugin::PodWeaver" : {
+ "config_plugins" : [
+ "@BioPerl"
+ ],
+ "finder" : [
+ ":InstallModules",
+ ":ExecFiles"
+ ],
+ "plugins" : [
+ {
+ "class" : "Pod::Weaver::Plugin::EnsurePod5",
+ "name" : "@CorePrep/EnsurePod5",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Plugin::H1Nester",
+ "name" : "@CorePrep/H1Nester",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Name",
+ "name" : "@BioPerl/Name",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Version",
+ "name" : "@BioPerl/Version",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Region",
+ "name" : "@BioPerl/prelude",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Generic",
+ "name" : "SYNOPSIS",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Generic",
+ "name" : "DESCRIPTION",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Generic",
+ "name" : "OVERVIEW",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Collect",
+ "name" : "ATTRIBUTES",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Collect",
+ "name" : "METHODS",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Collect",
+ "name" : "FUNCTIONS",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Collect",
+ "name" : "INTERNAL METHODS",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Leftovers",
+ "name" : "@BioPerl/Leftovers",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Region",
+ "name" : "@BioPerl/postlude",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Section::GenerateSection",
+ "name" : "FEEDBACK",
+ "version" : "1.02"
+ },
+ {
+ "class" : "Pod::Weaver::Section::GenerateSection",
+ "name" : "Mailing lists",
+ "version" : "1.02"
+ },
+ {
+ "class" : "Pod::Weaver::Section::GenerateSection",
+ "name" : "Support",
+ "version" : "1.02"
+ },
+ {
+ "class" : "Pod::Weaver::Section::GenerateSection",
+ "name" : "Reporting bugs",
+ "version" : "1.02"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Legal::Complicated",
+ "name" : "@BioPerl/Legal",
+ "version" : "1.21"
+ },
+ {
+ "class" : "Pod::Weaver::Section::Contributors",
+ "name" : "@BioPerl/Contributors",
+ "version" : "0.009"
+ },
+ {
+ "class" : "Pod::Weaver::Plugin::Encoding",
+ "name" : "Encoding",
+ "version" : "0.03"
+ },
+ {
+ "class" : "Pod::Weaver::Plugin::Transformer",
+ "name" : "@BioPerl/List",
+ "version" : "4.013"
+ },
+ {
+ "class" : "Pod::Weaver::Plugin::EnsureUniqueSections",
+ "name" : "EnsureUniqueSections",
+ "version" : "0.121550"
+ }
+ ]
+ }
+ },
+ "name" : "@Filter/PodWeaver",
+ "version" : "4.008"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::Git::Check",
+ "config" : {
+ "Dist::Zilla::Plugin::Git::Check" : {
+ "untracked_files" : "die"
+ },
+ "Dist::Zilla::Role::Git::DirtyFiles" : {
+ "allow_dirty" : [
+ "Changes",
+ "dist.ini"
+ ],
+ "allow_dirty_match" : [],
+ "changelog" : "Changes"
+ },
+ "Dist::Zilla::Role::Git::Repo" : {
+ "repo_root" : "."
+ }
+ },
+ "name" : "@Filter/Git::Check",
+ "version" : "2.039"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::Git::Commit",
+ "config" : {
+ "Dist::Zilla::Plugin::Git::Commit" : {
+ "add_files_in" : [],
+ "commit_msg" : "v%v%n%n%c"
+ },
+ "Dist::Zilla::Role::Git::DirtyFiles" : {
+ "allow_dirty" : [
+ "Changes",
+ "dist.ini"
+ ],
+ "allow_dirty_match" : [],
+ "changelog" : "Changes"
+ },
+ "Dist::Zilla::Role::Git::Repo" : {
+ "repo_root" : "."
+ },
+ "Dist::Zilla::Role::Git::StringFormatter" : {
+ "time_zone" : "local"
+ }
+ },
+ "name" : "@Filter/Git::Commit",
+ "version" : "2.039"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::Git::Tag",
+ "config" : {
+ "Dist::Zilla::Plugin::Git::Tag" : {
+ "branch" : null,
+ "changelog" : "Changes",
+ "signed" : 0,
+ "tag" : "Bio-EUtilities-v1.75",
+ "tag_format" : "%N-v%v",
+ "tag_message" : "%N-v%v"
+ },
+ "Dist::Zilla::Role::Git::Repo" : {
+ "repo_root" : "."
+ },
+ "Dist::Zilla::Role::Git::StringFormatter" : {
+ "time_zone" : "local"
+ }
+ },
+ "name" : "@Filter/Git::Tag",
+ "version" : "2.039"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::Prereqs",
+ "config" : {
+ "Dist::Zilla::Plugin::Prereqs" : {
+ "phase" : "runtime",
+ "type" : "requires"
+ }
+ },
+ "name" : "Requires",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::AutoPrereqs",
+ "name" : "AutoPrereqs",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":InstallModules",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":IncModules",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":TestFiles",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":ExtraTestFiles",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":ExecFiles",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":PerlExecFiles",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":ShareFiles",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":MainModule",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":AllFiles",
+ "version" : "6.007"
+ },
+ {
+ "class" : "Dist::Zilla::Plugin::FinderCode",
+ "name" : ":NoFiles",
+ "version" : "6.007"
+ }
+ ],
+ "zilla" : {
+ "class" : "Dist::Zilla::Dist::Builder",
+ "config" : {
+ "is_trial" : "0"
+ },
+ "version" : "6.007"
+ }
+ },
+ "x_authority" : "cpan:BIOPERLML",
+ "x_serialization_backend" : "Cpanel::JSON::XS version 3.0217"
+}
+
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..e67c6a7
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,400 @@
+---
+abstract: "Webagent which interacts with and retrieves data from NCBI's eUtils."
+author:
+ - 'cjfields <cjfields at cpan.org>'
+build_requires:
+ File::Spec: '0'
+ IO::Handle: '0'
+ IPC::Open3: '0'
+ Test::More: '0'
+configure_requires:
+ ExtUtils::MakeMaker: '0'
+dynamic_config: 0
+generated_by: 'Dist::Zilla version 6.007, CPAN::Meta::Converter version 2.150001'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: '1.4'
+name: Bio-EUtilities
+requires:
+ Bio::ASN1::EntrezGene: '0'
+ Bio::DB::GenericWebAgent: '0'
+ Bio::ParameterBaseI: '0'
+ Bio::Root::IO: '0'
+ Bio::Root::Root: '0'
+ Bio::Root::RootI: '0'
+ Bio::Root::Version: '0'
+ Bio::SeqIO: '0'
+ Cwd: '0'
+ Data::Dumper: '0'
+ File::Spec: '0'
+ Getopt::Long: '0'
+ HTTP::Request: '0'
+ LWP::UserAgent: '0'
+ Text::CSV: '0'
+ Text::Wrap: '0'
+ URI: '0'
+ XML::Simple: '0'
+ base: '0'
+ perl: '5.010'
+ strict: '0'
+ utf8: '0'
+ warnings: '0'
+resources:
+ bugtracker: https://github.com/bioperl/%%7Bdist%7D
+ homepage: https://metacpan.org/release/Bio-EUtilities
+ repository: git://github.com/bioperl/bio-eutilities.git
+version: '1.75'
+x_Dist_Zilla:
+ perl:
+ version: '5.022001'
+ plugins:
+ -
+ class: Dist::Zilla::Plugin::GatherDir
+ config:
+ Dist::Zilla::Plugin::GatherDir:
+ exclude_filename: []
+ exclude_match: []
+ follow_symlinks: 0
+ include_dotfiles: 0
+ prefix: ''
+ prune_directory: []
+ root: .
+ name: '@Filter/@Filter/GatherDir'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::PruneCruft
+ name: '@Filter/@Filter/PruneCruft'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::ManifestSkip
+ name: '@Filter/@Filter/ManifestSkip'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::MetaYAML
+ name: '@Filter/@Filter/MetaYAML'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::License
+ name: '@Filter/@Filter/License'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::ExtraTests
+ name: '@Filter/@Filter/ExtraTests'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::ExecDir
+ name: '@Filter/@Filter/ExecDir'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::ShareDir
+ name: '@Filter/@Filter/ShareDir'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::MakeMaker
+ config:
+ Dist::Zilla::Role::TestRunner:
+ default_jobs: 1
+ name: '@Filter/@Filter/MakeMaker'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::Manifest
+ name: '@Filter/@Filter/Manifest'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::TestRelease
+ name: '@Filter/@Filter/TestRelease'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::ConfirmRelease
+ name: '@Filter/@Filter/ConfirmRelease'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::UploadToCPAN
+ name: '@Filter/@Filter/UploadToCPAN'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::MetaConfig
+ name: '@Filter/MetaConfig'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::MetaJSON
+ name: '@Filter/MetaJSON'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::PkgVersion
+ name: '@Filter/PkgVersion'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::PodSyntaxTests
+ name: '@Filter/PodSyntaxTests'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::NextRelease
+ name: '@Filter/NextRelease'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::Test::Compile
+ config:
+ Dist::Zilla::Plugin::Test::Compile:
+ bail_out_on_fail: '0'
+ fail_on_warning: author
+ fake_home: 0
+ filename: t/00-compile.t
+ module_finder:
+ - ':InstallModules'
+ needs_display: 0
+ phase: test
+ script_finder:
+ - ':PerlExecFiles'
+ skips: []
+ name: '@Filter/Test::Compile'
+ version: '2.054'
+ -
+ class: Dist::Zilla::Plugin::MojibakeTests
+ name: '@Filter/MojibakeTests'
+ version: '0.8'
+ -
+ class: Dist::Zilla::Plugin::RunExtraTests
+ config:
+ Dist::Zilla::Role::TestRunner:
+ default_jobs: 1
+ name: '@Filter/RunExtraTests'
+ version: '0.029'
+ -
+ class: Dist::Zilla::Plugin::AutoMetaResources
+ name: '@Filter/AutoMetaResources'
+ version: '1.21'
+ -
+ class: Dist::Zilla::Plugin::MetaResources
+ name: '@Filter/MetaResources'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::Authority
+ name: '@Filter/Authority'
+ version: '1.009'
+ -
+ class: Dist::Zilla::Plugin::EOLTests
+ config:
+ Dist::Zilla::Plugin::Test::EOL:
+ filename: xt/release/eol.t
+ finder:
+ - ':ExecFiles'
+ - ':InstallModules'
+ - ':TestFiles'
+ trailing_whitespace: 1
+ version: '0.19'
+ name: '@Filter/EOLTests'
+ version: '0.19'
+ -
+ class: Dist::Zilla::Plugin::PodWeaver
+ config:
+ Dist::Zilla::Plugin::PodWeaver:
+ config_plugins:
+ - '@BioPerl'
+ finder:
+ - ':InstallModules'
+ - ':ExecFiles'
+ plugins:
+ -
+ class: Pod::Weaver::Plugin::EnsurePod5
+ name: '@CorePrep/EnsurePod5'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Plugin::H1Nester
+ name: '@CorePrep/H1Nester'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Name
+ name: '@BioPerl/Name'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Version
+ name: '@BioPerl/Version'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Region
+ name: '@BioPerl/prelude'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Generic
+ name: SYNOPSIS
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Generic
+ name: DESCRIPTION
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Generic
+ name: OVERVIEW
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Collect
+ name: ATTRIBUTES
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Collect
+ name: METHODS
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Collect
+ name: FUNCTIONS
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Collect
+ name: 'INTERNAL METHODS'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Leftovers
+ name: '@BioPerl/Leftovers'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::Region
+ name: '@BioPerl/postlude'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Section::GenerateSection
+ name: FEEDBACK
+ version: '1.02'
+ -
+ class: Pod::Weaver::Section::GenerateSection
+ name: 'Mailing lists'
+ version: '1.02'
+ -
+ class: Pod::Weaver::Section::GenerateSection
+ name: Support
+ version: '1.02'
+ -
+ class: Pod::Weaver::Section::GenerateSection
+ name: 'Reporting bugs'
+ version: '1.02'
+ -
+ class: Pod::Weaver::Section::Legal::Complicated
+ name: '@BioPerl/Legal'
+ version: '1.21'
+ -
+ class: Pod::Weaver::Section::Contributors
+ name: '@BioPerl/Contributors'
+ version: '0.009'
+ -
+ class: Pod::Weaver::Plugin::Encoding
+ name: Encoding
+ version: '0.03'
+ -
+ class: Pod::Weaver::Plugin::Transformer
+ name: '@BioPerl/List'
+ version: '4.013'
+ -
+ class: Pod::Weaver::Plugin::EnsureUniqueSections
+ name: EnsureUniqueSections
+ version: '0.121550'
+ name: '@Filter/PodWeaver'
+ version: '4.008'
+ -
+ class: Dist::Zilla::Plugin::Git::Check
+ config:
+ Dist::Zilla::Plugin::Git::Check:
+ untracked_files: die
+ Dist::Zilla::Role::Git::DirtyFiles:
+ allow_dirty:
+ - Changes
+ - dist.ini
+ allow_dirty_match: []
+ changelog: Changes
+ Dist::Zilla::Role::Git::Repo:
+ repo_root: .
+ name: '@Filter/Git::Check'
+ version: '2.039'
+ -
+ class: Dist::Zilla::Plugin::Git::Commit
+ config:
+ Dist::Zilla::Plugin::Git::Commit:
+ add_files_in: []
+ commit_msg: v%v%n%n%c
+ Dist::Zilla::Role::Git::DirtyFiles:
+ allow_dirty:
+ - Changes
+ - dist.ini
+ allow_dirty_match: []
+ changelog: Changes
+ Dist::Zilla::Role::Git::Repo:
+ repo_root: .
+ Dist::Zilla::Role::Git::StringFormatter:
+ time_zone: local
+ name: '@Filter/Git::Commit'
+ version: '2.039'
+ -
+ class: Dist::Zilla::Plugin::Git::Tag
+ config:
+ Dist::Zilla::Plugin::Git::Tag:
+ branch: ~
+ changelog: Changes
+ signed: 0
+ tag: Bio-EUtilities-v1.75
+ tag_format: '%N-v%v'
+ tag_message: '%N-v%v'
+ Dist::Zilla::Role::Git::Repo:
+ repo_root: .
+ Dist::Zilla::Role::Git::StringFormatter:
+ time_zone: local
+ name: '@Filter/Git::Tag'
+ version: '2.039'
+ -
+ class: Dist::Zilla::Plugin::Prereqs
+ config:
+ Dist::Zilla::Plugin::Prereqs:
+ phase: runtime
+ type: requires
+ name: Requires
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::AutoPrereqs
+ name: AutoPrereqs
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':InstallModules'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':IncModules'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':TestFiles'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':ExtraTestFiles'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':ExecFiles'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':PerlExecFiles'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':ShareFiles'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':MainModule'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':AllFiles'
+ version: '6.007'
+ -
+ class: Dist::Zilla::Plugin::FinderCode
+ name: ':NoFiles'
+ version: '6.007'
+ zilla:
+ class: Dist::Zilla::Dist::Builder
+ config:
+ is_trial: '0'
+ version: '6.007'
+x_authority: cpan:BIOPERLML
+x_serialization_backend: 'YAML::Tiny version 1.69'
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..d1dbca0
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,98 @@
+# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.007.
+use strict;
+use warnings;
+
+use 5.010;
+
+use ExtUtils::MakeMaker;
+
+my %WriteMakefileArgs = (
+ "ABSTRACT" => "Webagent which interacts with and retrieves data from NCBI's eUtils.",
+ "AUTHOR" => "cjfields <cjfields\@cpan.org>",
+ "CONFIGURE_REQUIRES" => {
+ "ExtUtils::MakeMaker" => 0
+ },
+ "DISTNAME" => "Bio-EUtilities",
+ "EXE_FILES" => [
+ "bin/bp_einfo",
+ "bin/bp_genbank_ref_extractor"
+ ],
+ "LICENSE" => "perl",
+ "MIN_PERL_VERSION" => "5.010",
+ "NAME" => "Bio::EUtilities",
+ "PREREQ_PM" => {
+ "Bio::ASN1::EntrezGene" => 0,
+ "Bio::DB::GenericWebAgent" => 0,
+ "Bio::ParameterBaseI" => 0,
+ "Bio::Root::IO" => 0,
+ "Bio::Root::Root" => 0,
+ "Bio::Root::RootI" => 0,
+ "Bio::Root::Version" => 0,
+ "Bio::SeqIO" => 0,
+ "Cwd" => 0,
+ "Data::Dumper" => 0,
+ "File::Spec" => 0,
+ "Getopt::Long" => 0,
+ "HTTP::Request" => 0,
+ "LWP::UserAgent" => 0,
+ "Text::CSV" => 0,
+ "Text::Wrap" => 0,
+ "URI" => 0,
+ "XML::Simple" => 0,
+ "base" => 0,
+ "strict" => 0,
+ "utf8" => 0,
+ "warnings" => 0
+ },
+ "TEST_REQUIRES" => {
+ "File::Spec" => 0,
+ "IO::Handle" => 0,
+ "IPC::Open3" => 0,
+ "Test::More" => 0
+ },
+ "VERSION" => "1.75",
+ "test" => {
+ "TESTS" => "t/*.t"
+ }
+);
+
+
+my %FallbackPrereqs = (
+ "Bio::ASN1::EntrezGene" => 0,
+ "Bio::DB::GenericWebAgent" => 0,
+ "Bio::ParameterBaseI" => 0,
+ "Bio::Root::IO" => 0,
+ "Bio::Root::Root" => 0,
+ "Bio::Root::RootI" => 0,
+ "Bio::Root::Version" => 0,
+ "Bio::SeqIO" => 0,
+ "Cwd" => 0,
+ "Data::Dumper" => 0,
+ "File::Spec" => 0,
+ "Getopt::Long" => 0,
+ "HTTP::Request" => 0,
+ "IO::Handle" => 0,
+ "IPC::Open3" => 0,
+ "LWP::UserAgent" => 0,
+ "Test::More" => 0,
+ "Text::CSV" => 0,
+ "Text::Wrap" => 0,
+ "URI" => 0,
+ "XML::Simple" => 0,
+ "base" => 0,
+ "strict" => 0,
+ "utf8" => 0,
+ "warnings" => 0
+);
+
+
+unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) {
+ delete $WriteMakefileArgs{TEST_REQUIRES};
+ delete $WriteMakefileArgs{BUILD_REQUIRES};
+ $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs;
+}
+
+delete $WriteMakefileArgs{CONFIGURE_REQUIRES}
+ unless eval { ExtUtils::MakeMaker->VERSION(6.52) };
+
+WriteMakefile(%WriteMakefileArgs);
diff --git a/bin/bp_einfo b/bin/bp_einfo
new file mode 100644
index 0000000..5167eff
--- /dev/null
+++ b/bin/bp_einfo
@@ -0,0 +1,156 @@
+#!perl
+
+use strict;
+use warnings;
+
+# PODNAME: bp_einfo
+# ABSTRACT: Query einfo to find all available databases or information about a specific database (field information or links to other NCBI databases).
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2009-2012 Chris Fields
+# LICENSE: Perl_5
+
+
+use Getopt::Long;
+use Bio::DB::EUtilities;
+
+my ($db, @fields, @links, $outfile, $email);
+GetOptions(
+ 'e|email:s' => \$email,
+ 'd|db|database:s' => \$db,
+ 'f|field:s' => \@fields,
+ 'l|link:s' => \@links,
+ 'o|out|outfile:s' => \$outfile,
+ 'h|help' => sub { exec('perldoc',$0); exit; }
+ );
+
+my $outfh;
+if( $outfile ) {
+ open($outfh, ">$outfile") || die("$outfile: $!");
+} else {
+ $outfh = \*STDOUT;
+}
+
+if (!defined $db) {
+ my $eutil = Bio::DB::EUtilities->new(-eutil => 'einfo',
+ -email => $email);
+ print $outfh join("\n",$eutil->get_available_databases);
+ exit;
+} else {
+ my $eutil = Bio::DB::EUtilities->new(-eutil => 'einfo',
+ -db => $db,
+ -email => $email);
+ if (@links || @fields) {
+ for my $fi ($eutil->get_FieldInfo) {
+ my $code = $fi->get_field_code;
+ if (grep {$_ eq $code} @fields) {
+ print $outfh $fi->to_string."\n";
+ }
+ }
+ for my $li ($eutil->get_LinkInfo) {
+ my $nm = $li->get_link_name;
+ if (grep {$_ eq $nm} @links) {
+ print $outfh $li->to_string."\n";
+ }
+ }
+ } else {
+ $eutil->print_FieldInfo;
+ $eutil->print_LinkInfo;
+ }
+}
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+bp_einfo - Query einfo to find all available databases or information about a specific database (field information or links to other NCBI databases).
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ bp_einfo [-d database] [-f Field Code] [-l Link Name] [-o outfile]
+
+=head1 DESCRIPTION
+
+Command line options:
+
+ -e/--email
+ Valid email (required by NCBI policy)
+
+ -d/--db/--database
+ NCBI database to query
+ (default = none, which shows available databases)
+
+ -f/--field
+ print out information about a specific field code
+ (default = none)
+
+ -l/--link
+ print out information about a specific link name
+ (default = none)
+
+ -o/--out
+ outfile
+ (default = STDOUT)
+
+ -h/--help
+ show this documentation
+
+As per NCBI's policy regarding eutils access, a valid email is required. This
+is not enforced here (if one is provided you will get a standard warning), but
+don't be surprised if this doesn't work after June 1, 2010 unless one is
+supplied.
+
+If -d is not specified, field and link arguments are ignored and all available
+databases are printed instead.
+
+If either link names or field codes (or both) are specified, nothing else is
+printed out (only the info requested). You can specify as many fields and/or
+links as you want by using multiple -f/-l E<lt>ARGE<gt> on the command line.
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2009-2012 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/bin/bp_genbank_ref_extractor b/bin/bp_genbank_ref_extractor
new file mode 100755
index 0000000..7bdcdcd
--- /dev/null
+++ b/bin/bp_genbank_ref_extractor
@@ -0,0 +1,1175 @@
+#!/usr/bin/perl
+use utf8;
+## Copyright (C) 2011-2015 Carnë Draug <carandraug+dev at gmail.com>
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+use 5.010; # Use Perl 5.10
+use warnings; # Replacement for the -w flag, but lexically scoped
+use strict; # Enforce some good programming rules
+use Getopt::Long; # Parse program arguments
+use Cwd; # Determines current working directory
+use File::Spec; # Perform operation on file names
+use Bio::SeqIO; # Handler for SeqIO Formats
+use Bio::DB::EUtilities; # Retrieve entries from Entrez
+use Bio::Root::Version; # Get global bioperl-live version number
+
+# PODNAME: bp_genbank_ref_extractor
+# ABSTRACT: Retrieves all related sequences for a list of searches on Entrez gene
+# AUTHOR: Carnë Draug <carandraug+dev at gmail.com>
+# OWNER: 2011-2015 Carnë Draug
+# LICENSE: GPL_3
+
+
+my $assembly_regex = '(primary|reference) assembly';
+
+my $debug = 0;
+
+my $downstream = 0;
+
+my $email = "";
+
+my $format = 'genbank';
+
+my $genes = '';
+sub genes_option_parsing {
+ for ($_[1]) {
+ if (/^(u)?id$/i) { $genes = 'uid'; }
+ elsif (/^sym(bol)?$/i) { $genes = 'symbol'; }
+ elsif (/^name$/i) { $genes = 'name'; }
+ elsif ($_ eq "") { $genes = 'uid' }
+ else { die "Invalid identifier '$_[1]' for gene files."; }
+ }
+}
+
+my $limit = 200;
+
+my $get_noncoding = 1;
+
+my $proteins = '';
+sub proteins_option_parsing {
+ for ($_[1]) {
+ if (/^acc(ession)?$/i) { $proteins = 'accession'; }
+ elsif (/^desc(ription)?$/i) { $proteins = 'description'; }
+ elsif (/^gene$/i) { $proteins = 'gene'; }
+ elsif (/^(transcript|mrna)$/i) { $proteins = 'transcript'; }
+ ## default is set here, when value is empty
+ elsif ($_ eq '') { $proteins = 'accession' }
+ else { die "Invalid identifier '$_[1]' for protein files."; }
+ }
+}
+
+my $get_pseudo = 1;
+
+my $save = File::Spec->catfile (getcwd, 'extracted sequences');
+
+my $save_data = '';
+sub save_data_option_parsing {
+ for ($_[1]) {
+ if (/^csv$/i) { $save_data = 'csv'; require Text::CSV; }
+ elsif ($_ eq '') { $save_data = 'csv'; require Text::CSV; } ## Do nothing. If not set, use default
+ else { die "Specified format to save data '$save_data' is not valid."; }
+ }
+}
+
+my $transcripts = '';
+sub transcripts_option_parsing {
+ for ($_[1]) {
+ if (/^acc(ession)?$/i) { $transcripts = 'accession'; }
+ elsif (/^desc(ription)?$/i) { $transcripts = 'description'; }
+ elsif (/^gene$/i) { $transcripts = 'gene'; }
+ elsif (/^protein$/i) { $transcripts = 'protein'; }
+ elsif ($_ eq '') { $transcripts = 'accession' }
+ else { die "Invalid identifier '$_[1]' for transcript files."; }
+ }
+}
+
+my $upstream = 0;
+
+my $verbose = '';
+
+my $very_verbose = '';
+
+
+################################################################################
+## Parse options, check and create files and directories needed
+################################################################################
+
+GetOptions(
+ 'assembly:s' => \$assembly_regex,
+ 'debug' => \$debug,
+ 'down|downstream=i' => \$downstream,
+ 'email=s' => \$email,
+ 'format=s' => \$format,
+ 'genes:s' => \&genes_option_parsing,
+ 'limit=i' => \$limit,
+ 'non-coding!' => \$get_noncoding,
+ 'proteins:s' => \&proteins_option_parsing,
+ 'pseudo!' => \$get_pseudo,
+ 'save=s' => \$save,
+ 'save-data:s' => \&save_data_option_parsing,
+ 'transcripts|mrna:s' => \&transcripts_option_parsing,
+ 'up|upstream=i' => \$upstream,
+ 'verbose|v' => \$verbose,
+ 'very-verbose|vv' => \$very_verbose,
+ 'h|help' => sub { exec('perldoc', $0); exit; }
+ ) or die "Error processing options";
+## It is necessary to check success of GetOptions since:
+## ''GetOptions returns true to indicate success. It returns false when the function
+## detected one or more errors during option parsing. These errors are signalled
+## using warn() and can be trapped with $SIG{__WARN__}''
+
+## set verbosity level
+my $verbosity;
+if ($debug) {
+ $verbosity = 9;
+} elsif ($very_verbose) {
+ $verbosity = 3;
+} elsif ($verbose) {
+ $verbosity = 2;
+} else {
+ $verbosity = 1;
+}
+
+my $gene_dir = File::Spec->catfile ($save, 'genes');
+my $mrna_dir = File::Spec->catfile ($save, 'transcripts');
+my $prot_dir = File::Spec->catfile ($save, 'proteins');
+check_dir($_) foreach ($save, $gene_dir, $mrna_dir, $prot_dir);
+my $log_file = File::Spec->catfile ($save, 'extractor.log');
+open (LOG, ">", $log_file) or die "Couldn't open file $log_file for writing: $!";
+
+log_it (1, "This is bp_genbank_ref_extractor on Bioperl $Bio::Root::Version::VERSION on ". &get_time);
+
+################################################################################
+## Everything is ready. Start accessing the database
+################################################################################
+
+my $data = Structure->new;
+say "Searching on Entrez gene...";
+my @uids;
+push (@uids, gb_search ($_)) foreach (@ARGV);
+{
+ my $start = scalar(@uids);
+ clean_array(\@uids);
+ my $diff = $start - scalar(@uids);
+ log_it (2, "Entrez gene: removed $diff UIDs from the search results for being repeated.") if $diff > 0;
+ log_it (3, "Entrez gene: list of retrieved IDs is: @uids");
+}
+say "Fetching gene info...";
+analyze_entrez_genes ($data, \@uids);
+
+if ($genes) { say "Fetching gene sequences..."; get_genes($data); }
+if ($transcripts) { say "Fetching transcript sequences..."; get_products('transcript', $data); }
+if ($proteins) { say "Fetching protein sequences..."; get_products('protein', $data); }
+
+if ($save_data) { save_structure($data); }
+if ($debug) { use Data::Dumper; print Dumper $data; }
+exit;
+
+################################################################################
+## genbank search subs
+################################################################################
+
+sub gb_search {
+ log_it (2, "Entrez gene: searching with '$_[0]'");
+ my $searcher = Bio::DB::EUtilities->new(
+ -eutil => 'esearch',
+ -db => 'gene',
+ -term => $_[0],
+ -retmax => $limit,
+ -email => $email,
+ );
+ log_it (3, "Entrez gene: query $_[0] translated into '" . $searcher->get_query_translation . "'");
+ log_it (2, "Entrez gene: found " . $searcher->get_count . " UIDS");
+ if ($searcher->get_count > $limit) {
+ my $w_message = "Entrez gene: search returned more ids than the set limit of $limit. Retrieving only the first $limit genes.";
+ log_it (2, $w_message);
+ warn $w_message;
+ }
+ return $searcher->get_ids;
+}
+
+## we are not using esummary because it doesn't tell us the products of the gene thus forcing us
+## to download the sequence and analyze it from that. We are also not using elink because it fails
+## too frequently. Also, entrezgene gives currently the most information so it'll be easier to implement
+## new stuff at a later time
+sub analyze_entrez_genes {
+ my $struct = shift;
+ my $uid_ref = shift; # a reference for the array containing the list of gene UID
+
+ ## TODO may be a good idea to limit this and download only a few sequences rather
+ ## than what can possibly be thousands of them.
+ my $fetcher = Bio::DB::EUtilities->new(
+ -eutil => 'efetch',
+ -db => 'gene',
+ -id => $uid_ref,
+ -retmode => 'text',
+ -rettype => 'asn1',
+ -email => $email,
+ );
+ my $response = $fetcher->get_Response->content;
+ open(my $seq_fh, "<", \$response) or die "Could not open sequences string for reading: $!";
+
+ ## TODO when this bug is fixed https://redmine.open-bio.org/issues/3261
+ ## this should be fixed to use Bio::SeqIO with format=> 'entrezgene'
+ ## then we could use methods to access the data
+ use Bio::ASN1::EntrezGene;
+
+ my $parser = Bio::ASN1::EntrezGene->new(
+ -fh => $seq_fh,
+ );
+
+ SEQ: while(my $result = $parser->next_seq){
+ ## it's possible that when analyzing genes, if a gene has a RefSeq status of secondary, it will point
+ ## to another gene, also in the list. To prevent analyzing the same gene twice here, this hash keeps
+ ## track of the analyzed gene UIDs (even the ones who are pseudo and may not be on $struct)
+ state %analyzed_genes;
+ $result = $result->[0] if(ref($result) eq 'ARRAY');
+ ## Data::Dumper can be used to look into the structure and find where things are
+# use Data::Dumper;
+# print Dumper ($result);
+# exit;
+
+ my $uid = find_in_entrezgene ($result, ['track-info', 'geneid']);
+ if ($analyzed_genes{$uid}) {
+ log_it (9, "DEBUG: skipping analysis of gene with UID='$uid' since it's already been done.");
+ next SEQ;
+ }
+ $analyzed_genes{$uid} = 1;
+
+ my ($symbol, $name);
+ foreach my $p (@{$result->{'properties'}}){
+ $p = $p->[0] if(ref($p) eq 'ARRAY');
+ next unless ($p->{'label'} && $p->{'label'} eq 'Nomenclature');
+ foreach my $pp (@{$p->{'properties'}}){
+ $pp = $pp->[0] if(ref($pp) eq 'ARRAY');
+ $name = $pp->{'text'} if ($pp->{'label'} && $pp->{'label'} eq 'Official Full Name');
+ $symbol = $pp->{'text'} if ($pp->{'label'} && $pp->{'label'} eq 'Official Symbol');
+ }
+ }
+
+ ## if couldn't find the name and symbol on 'properties', try 'gene'
+ ## if still couldn't find the name, try 'rna' and then 'prot'
+ $symbol //= find_in_entrezgene ($result, ['gene', 'locus']);
+ $name //= find_in_entrezgene (
+ $result,
+ ['gene', 'desc'],
+ ['rna', 'ext', 'name'],
+ ['prot', 'name']
+ );
+
+
+ my $ensembl = find_in_entrezgene ($result, ['gene', 'db', 'Ensembl', 'tag', 'str']);
+
+ ## values for the gene-status (different from RefSeq status)
+ ## live good??
+ ## secondary synonym with merged
+ ## discontinued 'deleted', still index and display to public
+ ## newentry- for GeneRif submission
+ my $status = find_in_entrezgene ($result, ['track-info', 'status']);
+
+ if ($status eq 'discontinued') {
+ log_it (3, "Discontinued gene: UID='$uid', symbol='$symbol', name='$name'. Forgetting about it...");
+ next SEQ;
+ } elsif ($status eq 'secondary') {
+ ## recursivity! UUUUUUUUUU!
+ log_it (3, "Secondary gene: UID='$uid', symbol='$symbol', name='$name'. Attempting to find its current UID...");
+ my $current_id;
+ foreach my $c (@{$result->{'track-info'}->[0]->{'current-id'}}) {
+ next unless $c->{'db'} eq 'GeneID';
+ $current_id = $c->{'tag'}->[0]->{'id'};
+ next unless $current_id;
+ log_it (3, "Update: found current UID '$current_id' of secondary gene with UID='$uid'");
+ analyze_entrez_genes ($struct, [$current_id]);
+ }
+ log_it (3, "Update: could not find current UID of secondary gene with UID='$uid'") unless $current_id;
+ next SEQ;
+ } else {
+ if (!$status) {
+ log_it (1, "WARNING: couldn't find gene status for gene with UID='$uid'. Assuming value of 'live'");
+ $status = 'live-assumed';
+ }
+ my @extra_arguments;
+ my $ng_message = "New gene: UID='$uid', gene status='$status', symbol='$symbol', name='$name'";
+ if ($ensembl) {
+ $ng_message = $ng_message . ", EnsEMBL ID='$ensembl'";
+ push (@extra_arguments, ensembl => $ensembl);
+ }
+ log_it (3, "$ng_message");
+ $struct->add_gene(
+ uid => $uid,
+ status => $status,
+ name => $name,
+ symbol => $symbol,
+ @extra_arguments,
+ );
+ }
+
+ ## get the gene location (something like 1q21). And if we can't find
+ ## it in location, we look for it in gene maploc
+ my $locus = find_in_entrezgene (
+ $result,
+ ['location', 'display-str'],
+ ['gene', 'maploc']
+ );
+ if ($locus) {
+ $struct->add_gene(uid => $uid, locus => $locus);
+ } else {
+ log_it (1, "WARNING: couldn't find location for gene with UID='$uid'.");
+ }
+
+ ## get the species names
+ my $species = find_in_entrezgene ($result, ['source', 'org', 'taxname']);
+ if ($species) {
+ $struct->add_gene(uid => $uid, species => $species);
+ } else {
+ log_it (1, "WARNING: couldn't find species for gene with UID='$uid'.");
+ }
+
+ ## Type can be 'pseudo' or 'protein-coding'. That;s the only type of things
+ ## we handle. There are also, 'ncRNA' for non coding RNA, and even 'other'
+ ## for things such as regions
+ if ($result->{'type'} eq 'pseudo') {
+ log_it (3, "Update: gene with UID='$uid' is '". $result->{'type'} ."' gene. Marking as pseudo...");
+ $struct->add_gene(uid => $uid, pseudo => 1);
+ unless ($get_pseudo) {
+ log_it (3, "Update: removing gene with UID='$uid' for being pseudo...");
+ $struct->remove_gene($uid);
+ next SEQ;
+ }
+ } elsif ($result->{'type'} eq 'protein-coding') {
+ log_it (3, "Update: gene with UID='$uid' is '". $result->{'type'} ."' gene. Marking as protein-coding...");
+ $struct->add_gene(uid => $uid, pseudo => 0);
+ } else {
+ log_it (1, "WARNING: gene with UID='$uid' is of type '" . $result->{'type'} . "' . Skipping...");
+ $struct->remove_gene($uid);
+ next SEQ;
+ }
+
+ foreach my $l (@{$result->{'locus'}}){
+ $l = $l->[0] if(ref($l) eq 'ARRAY');
+ next unless ($l->{'heading'} && $l->{'heading'} =~ m/$assembly_regex/i);
+ my $assembly = $l->{'heading'};
+ my $ChrAccVer = $l->{'accession'};
+ my $ChrStart = find_in_entrezgene ($l, ['seqs', 'int', 'from' ]);
+ my $ChrStop = find_in_entrezgene ($l, ['seqs', 'int', 'to' ]);
+ my $ChrStrand = find_in_entrezgene ($l, ['seqs', 'int', 'strand']);
+ if ($ChrStrand eq 'plus') {
+ $ChrStrand = 1;
+ $ChrStart += 1 - $upstream;
+ $ChrStop += 1 + $downstream;
+ } else {
+ $ChrStrand = 2;
+ $ChrStart += 1 - $downstream;
+ $ChrStop += 1 + $upstream;
+ }
+ log_it (3, "Update: gene with UID='$uid' has Accesion number '$ChrAccVer' between coordinates $ChrStart ... $ChrStop on strand $ChrStrand.");
+ $struct->add_gene(
+ uid => $uid,
+ assembly => $assembly,
+ ChrAccVer => $ChrAccVer,
+ ChrStart => $ChrStart,
+ ChrStop => $ChrStop,
+ ChrStrand => $ChrStrand,
+ );
+ last; # if we got here once, no point in looking on the others
+ }
+
+ ## we will look for products accessions on the comments section instead of the
+ ## locus section because locus doesn't say the RefSeq status of the products
+ foreach my $c (@{$result->{'comments'}}){
+ $c = $c->[0] if(ref($c) eq 'ARRAY');
+
+ ## get RefSeq status
+ if ($c->{'heading'} && $c->{'heading'} eq 'RefSeq Status') {
+ my $refseq_status = $c->{'label'};
+ log_it (3, "Update: gene with UID='$uid' has RefSeq status='$refseq_status'");
+ $struct->add_gene(uid => $uid, 'RefSeq status' => $refseq_status);
+ }
+
+ ## the rest of the block only makes sense if it's not a pseudo gene
+ ## XXX Also, some genes enconde fancy types of RNA without an actual
+ ## protein product which we are not skipping. should we check if the
+ ## gene type has RNA somewhere on its name?
+ if ($struct->get_info('gene', $uid, 'pseudo') ) {
+ log_it (9, "DEBUG: finished analyzing gene with UID='$uid' earlier since it's pseudo gene " . by_caller_and_location('here') );
+ next SEQ;
+ }
+ next unless ($c->{'heading'} && $c->{heading} eq 'NCBI Reference Sequences (RefSeq)');
+ foreach my $cc (@{$c->{'comment'}}){
+ $cc = $cc->[0] if(ref($cc) eq 'ARRAY');
+ next unless $cc->{'heading'} eq 'RefSeqs maintained independently of Annotated Genomes';
+ foreach my $ccp (@{$cc->{'products'}}){
+ $ccp = $ccp->[0] if(ref($ccp) eq 'ARRAY');
+ my $mRNA_acc = $ccp->{'accession'};
+ my $prot_acc = $ccp->{'products'}->[0]->{'accession'};
+ ## for the RefSeq status needs to be on a loop since there's a list of comments
+ ## About RefSeq status of products:
+ ## http://www.ncbi.nlm.nih.gov/entrez/query/static/help/genefaq.html#faq_g7.2
+ ## XXX what can we do with this product RefSeq status?
+ my $ref_stat;
+ foreach my $ccpc (@{$ccp->{'comment'}}){
+ next unless ($ccpc->{'label'} && $ccpc->{'label'} eq 'RefSeq Status');
+ $ref_stat = $ccpc->{'text'};
+ }
+ ## some transcripts are non-coding. In those cases $prot_acc hence the
+ ## need for this. For those cases, we want to give no value for 'protein'
+ ## and not an empty string. The opposite (protein without corresponding
+ ## transcript) should not be possible
+ if (!$prot_acc) {
+ log_it (3, "Update: gene with UID='$uid' found to encode transcrip='$mRNA_acc' which is non-coding transcript.");
+ ## TODO some transcripts, do not encode a protein. This will create errors
+ ## must fix for genes such as UID 100507436
+ $struct->add_product(
+ type => 'transcript',
+ accession => $mRNA_acc,
+ gene => $uid,
+ coding => 0,
+ );
+ } else {
+ log_it (3, "Update: gene with UID='$uid' found to encode transcrip='$mRNA_acc' and protein='$prot_acc' with product RefStatus of '$ref_stat'.");
+ ## TODO some transcripts, do not encode a protein. This will create errors
+ ## must fix for genes such as UID 100507436
+ $struct->add_product(
+ type => 'transcript',
+ accession => $mRNA_acc,
+ gene => $uid,
+ protein => $prot_acc,
+ coding => 1,
+ );
+ $struct->add_product(
+ type => 'protein',
+ accession => $prot_acc,
+ gene => $uid,
+ transcript => $mRNA_acc,
+ );
+ }
+ }
+ }
+ }
+ unless (scalar($struct->get_product_list('transcript', $uid)) >= 1 &&
+ scalar($struct->get_product_list('protein', $uid)) >= 1) {
+ log_it (1, "WARNING: non-pseudo gene with UID='$uid' returned no protein and transcript.");
+ }
+
+ }
+}
+
+sub create_fetcher {
+ my $searched = shift;
+ my $rettype;
+ if ($format eq 'genbank') {$rettype = 'gb';}
+ elsif ($format eq 'fasta') {$rettype = 'fasta';}
+ else {
+ $rettype = 'native';
+ log_it (1, "WARNING: couldn't convert format '$format' to rettype. Using native as default.");
+ }
+
+ my $fetcher = Bio::DB::EUtilities->new(
+ -eutil => 'efetch',
+ -db => $searched,
+ -retmode => 'text',
+ -rettype => $rettype,
+ -email => $email,
+ );
+ return $fetcher;
+}
+
+sub get_filehandle {
+ my $type = shift;
+ my $product_key = shift;
+ my $name_key = shift;
+ my $struct = shift;
+ my $base_dir;
+ if ($type eq 'gene') { $base_dir = $gene_dir; }
+ elsif ($type eq 'transcript') { $base_dir = $mrna_dir; }
+ elsif ($type eq 'protein') { $base_dir = $prot_dir; }
+ else {
+ die "Found a bug. Unknow type provided '$type' to generate filename ". by_caller_and_location('before') ." Please report.";
+ }
+ my $filename = fix_filename ( $struct->get_info($type, $product_key, $name_key) );
+ my $filepath = File::Spec->catfile ($base_dir, $filename . file_extension_for($format));
+ open (my $filehandle, ">", $filepath) or die "Couldn't open '$filepath' for writing: $!";
+ return $filehandle;
+}
+
+sub get_genes {
+ my $struct = shift;
+ my @gene_uids = $struct->get_list('gene');
+ my $fetcher = create_fetcher('nucleotide');
+ foreach my $gene_uid (@gene_uids) {
+ log_it (2, "Fetching gene: trying to fetch gene with UID='$gene_uid'...");
+ unless ($struct->get_info('gene', $gene_uid, 'ChrAccVer')) {
+ log_it (2, "Update: found no genomic info for gene with UID='$gene_uid'...");
+ next;
+ }
+ my $filehandle = get_filehandle('gene', $gene_uid, $genes, $struct);
+ $fetcher->set_parameters (
+ -id => $struct->get_info('gene', $gene_uid, 'ChrAccVer'),
+ -seq_start => $struct->get_info('gene', $gene_uid, 'ChrStart'),
+ -seq_stop => $struct->get_info('gene', $gene_uid, 'ChrStop'),
+ -strand => $struct->get_info('gene', $gene_uid, 'ChrStrand'),
+ );
+ log_it (3, "Update: fetching gene sequence for gene with UID='$gene_uid'...");
+ print $filehandle $fetcher->get_Response->content;
+ close $filehandle or warn $! ? "WARNING: error closing filehandle: $!" : "WARNING: exit status $? from filehandle";
+ }
+}
+
+sub get_products {
+ my $product = shift;
+ my $struct = shift;
+ my @product_acc = $struct->get_list($product);
+
+ my ($fetcher, $base_name);
+ if ($product eq 'transcript') {
+ $fetcher = create_fetcher ('nuccore');
+ $base_name = $transcripts;
+ } elsif ($product eq 'protein') {
+ $fetcher = create_fetcher ('protein');
+ $base_name = $proteins;
+ } else {
+ die "Bug found. Invalid product $product argument given ". by_caller_and_location('before') .". Please report it.";
+ }
+
+ ## ideally, there would be no loop, and we'd use $fetcher to get all the sequences in
+ ## one go. However, that would force us to get the sequences in Bio::Seq files which
+ ## can be different from the actually downloaded file. Better to not take the chance
+ foreach my $product_acc (@product_acc) {
+ if ( !$get_noncoding && $product eq 'transcript' && !$struct->get_info($product, $product_acc, 'coding') ) {
+ log_it (2, "Fetching $product: skipping $product with accession='$product_acc' since it's non-coding...");
+ } else {
+ log_it (2, "Fetching $product: trying to fetch $product with accession='$product_acc'...");
+ }
+ $fetcher->set_parameters (
+ -id => $product_acc,
+ );
+ my $response = $fetcher->get_Response->content;
+ open(my $seq_fh, "<", \$response) or die "Could not open sequence string for reading: $!";
+ my $parser = Bio::SeqIO->new(
+ -fh => $seq_fh,
+ -format => $format,
+ );
+ while (my $seq = $parser->next_seq) {
+ my $product_desc = $seq->description;
+ $struct->add_product(
+ type => $product,
+ accession => $product_acc,
+ description => $product_desc,
+ );
+ }
+ my $filehandle = get_filehandle($product, $product_acc, $base_name, $struct);
+ print $filehandle $response;
+ close $filehandle;
+ }
+}
+
+################################################################################
+## other small useful subs
+################################################################################
+
+
+## checks if directories exist and creates them as needed
+sub check_dir {
+ # TODO must check for permissions as well
+ if (!-e $_[0]) {
+ mkdir $_[0] or die "Could not mkdir '$_[0]': $!";
+ log_it (9, "DEBUG: directory '$_[0]' created.");
+ } elsif (!-d $_[0]) {
+ die "Directory '$_[0]' to save output already exists as non-directory.";
+ } else {
+ log_it (9, "DEBUG: directory '$_[0]' NOT created since it already exists.");
+ }
+}
+
+
+sub log_it {
+ my $level = shift;
+ my $message = shift;
+ say STDOUT $message unless $level > $verbosity;
+ ## debug messages (level 9) will only be logged if on debug mode
+ if ($level == 9) {
+ say LOG $message if $debug;
+ } else {
+ say LOG $message;
+ }
+}
+
+sub find_in_entrezgene {
+ ## we have to look into the mess of a structure returned by
+ ## Bio::ASN1::Entrezgene, causing us to make a bunch of checks.
+ ## This will return undef if the path does not exist, or the value
+ ## found. Only hash keys need to be specified, if an array is found
+ ## it will look into all of its elements. Second argument onwards
+ ## must be ARRAY refs to the hash keys. They will be looked in order
+ ## example:
+ ## find_in_entrezgene ($seq, ['location', 'display-str'], ['gene', 'maploc']);
+
+ my $seq = shift;
+ my $val;
+ foreach my $keys (@_) {
+ if (ref($seq) eq 'ARRAY') {
+ foreach (@{$seq}) {
+ $val = find_in_entrezgene ($_, $keys);
+ ## value may be false, but will be undefined if just not found
+ last if defined $val;
+ }
+ } elsif (ref($seq) eq 'HASH') {
+ my $key = shift (@{$keys});
+ $val = find_in_entrezgene ($seq->{$key}, $keys) if (exists $seq->{$key});
+ } elsif (!ref($seq)) {
+ ## not a reference, must be the value we are looking for
+ $val = $seq;
+ } else {
+ die "error when transversing entrezgene structure.\n";
+ }
+ last if defined $val;
+ }
+ return $val;
+}
+
+## Removes repeated elements from an array. Does not respect original order
+sub clean_array {
+ my %hash;
+ foreach (@{$_[0]}) {
+ if ($hash{$_}) {
+ log_it (9, "DEBUG: value '$_' removed from array " . by_caller_and_location('here') . " called " . by_caller_and_location('before') );
+ } else {
+ $hash{$_} = 1;
+ }
+ }
+ @{$_[0]} = keys %hash;
+}
+
+## Returns a pretty string about current time
+sub get_time {
+ my ($second, $minute, $hour, $day, $month, $year) = (localtime)[0,1,2,3,4,5];
+ return sprintf ("[%04d-%02d-%02d %02d:%02d:%02d]", $year+1900, $month+1, $day, $hour, $minute, $second);
+}
+
+
+## Tries to sanitize a filename
+sub fix_filename {
+ my $file = $_[0];
+ $file =~ s/[^a-z0-9\-\+ \.,\(\){}\[\]']/_/ig;
+ log_it (9, "DEBUG: filepath '$_[0]' was converted to '$file' " . by_caller_and_location('here') . " called " . by_caller_and_location('before') );
+ return $file;
+}
+
+sub by_caller_and_location {
+ my $level;
+ if (!@_ || $_[0] eq 'here') {
+ $level = 1;
+ } elsif ($_[0] eq 'before'){
+ $level = 2;
+ } elsif ($_[0] =~ /^[0-9]+$/){
+ $level = 1 + $_[0];
+ } else {
+ die "Bug found when calculating level for caller function. Please report.";
+ }
+ my $deeper = shift;
+ return "by " . (caller($level))[3] . " at line " . (caller($level))[2];
+}
+
+
+sub file_extension_for {
+ ## TODO in some cases, extension changes whether it's protein or DNA or whatever
+ ## and this should be supported
+ ## XXX there must be a more elegant to handle the formats on this scripts
+
+ ## to update this list, look in the _guess_format method, inside SeqIO.pm of bioperl
+ for ($_[0]) {
+ if (/embl/i) {return '.embl';}
+ elsif (/entrezgene/i) {return '.asn';}
+ elsif (/fasta/i) {return '.fasta';} # fasta|fast|fas|seq|fa|fsa|nt|aa|fna|faa
+ elsif (/fastq/i) {return '.fastq';}
+ elsif (/gcg/i) {return '.gcg';}
+ elsif (/genbank/i) {return '.gb';} # gb|gbank|genbank|gbk|gbs
+ elsif (/swiss/i) {return '.swiss';} # swiss|sp
+ else {
+ log_it (9, "DEBUG: couldn't find the right extension for the requested format. Using '.seq' as default.");
+ return ".seq";
+ }
+ }
+}
+
+sub save_structure {
+ if ($save_data eq 'csv') { create_csv($_[0]); }
+}
+
+sub create_csv {
+ my $struct = shift;
+ my $csv = Text::CSV->new ({
+ binary => 1,
+ eol => $/,
+ }) or die "Cannot use Text::CSV: ". Text::CSV->error_diag ();
+
+ my $csv_file = File::Spec->catfile ($save, 'data.csv');
+ open (my $fh, ">", $csv_file) or die "Couldn't open file $csv_file for writing: $!";
+
+ $csv->print ($fh, ['gene symbol', 'species', 'gene UID', 'EnsEMBL ID', 'gene name', 'pseudo', 'transcript accession','protein accession', 'locus', 'chromosome accession', 'chromosome start coordinates', 'chromosome stop coordinates', 'assembly'] );
+
+ my @uids = $struct->get_list('gene');
+ foreach my $uid(@uids) {
+ my @lines;
+ my @mRNA_acc = $struct->get_product_list('transcript', $uid);
+ if (!@mRNA_acc) { @mRNA_acc = (''); } # this allows the next loop to run once for pseudo genes
+ foreach my $mRNA_acc (@mRNA_acc) {
+ push(@lines, [
+ $struct->get_info('gene', $uid, 'symbol'),
+ $struct->get_info('gene', $uid, 'species'),
+ $uid,
+ $struct->get_info('gene', $uid, 'ensembl'),
+ $struct->get_info('gene', $uid, 'name'),
+ $struct->get_info('gene', $uid, 'pseudo'),
+ $mRNA_acc,
+ $struct->get_info('transcript', $mRNA_acc, 'protein'),
+ $struct->get_info('gene', $uid, 'locus'),
+ $struct->get_info('gene', $uid, 'ChrAccVer'),
+ $struct->get_info('gene', $uid, 'ChrStart'),
+ $struct->get_info('gene', $uid, 'ChrStop'),
+ $struct->get_info('gene', $uid, 'assembly'),
+ ]);
+ }
+ $csv->print ($fh, $_) for @lines;
+ }
+ close $fh;
+}
+
+
+################################################################################
+## Structure methods
+################################################################################
+package Structure;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Structure::VERSION = '1.75';
+## creates a new instance of the object
+sub new {
+ my $class = shift;
+ my $self = {};
+ bless ($self, $class);
+ return $self;
+}
+
+## adds information to a specific gene and adds the gene to the structure if it doesn't exist
+## $object->add_gene (
+## uid => gene_uid_of_the_gene,
+## gene_name => gene_name,
+## other_info => corresponding value
+## );
+sub add_gene {
+ my $self = shift;
+ my %data = @_;
+ ## remove the value from the hash so it doesn't show up in the loop
+ my $gene_uid = delete $data{'uid'};
+ unless ($gene_uid) {
+ log_it (1, "WARNING: no gene UID supplied when adding new gene". main::by_caller_and_location('before') );
+ return;
+ }
+ ## when adding a gene (instead of updating, this goes first. Can't just hope
+ ## to happen by itself on the loop later because it's possible to add a gene
+ ## with no info on it
+ if ( !exists($self->{'gene'}->{$gene_uid}) ) {
+ main::log_it (9, "DEBUG: creating new gene with UID='$gene_uid'.");
+ $self->{'gene'}->{$gene_uid} = {};
+ $self->{'gene'}->{$gene_uid}->{'uid'} = $gene_uid; # this is not stupid. Makes it easier to have a general function to create the filename
+ $self->{'gene'}->{$gene_uid}->{'transcript'} = [];
+ $self->{'gene'}->{$gene_uid}->{'protein'} = [];
+ }
+ ## fill it with all the data
+ foreach my $key (keys %data) {
+ $self->{'gene'}->{$gene_uid}->{$key} = $data{$key};
+ main::log_it (9, "DEBUG: added $key='$data{$key}' to gene with UID='$gene_uid'.");
+ }
+}
+
+## remove genes from the structure given their uid
+## $object->remove_gene($uid)
+## $object->remove_gene(@uids)
+sub remove_gene {
+ my $self = shift;
+ foreach my $uid (@_) {
+ delete $self->{'gene'}->{$uid};
+ main::log_it (9, "DEBUG: removed gene with UID='$uid'");
+ }
+}
+
+sub add_product {
+ my $self = shift;
+ my %data = @_;
+ ## remove these values from the hash so they don't show up in the loop later
+ my $product = delete $data{'type'};
+ die "Bug found. Please report this. Product requested was $product ". main::by_caller_and_location('before') unless ($product eq 'protein' || $product eq 'transcript');
+ my $product_acc = delete $data{'accession'};
+ unless ($product_acc) {
+ main::log_it(1, "WARNING: no $product accession supplied when adding new product ". main::by_caller_and_location('before') . " Adding nothing." );
+ return;
+ }
+ ## Since it's possible that a record for a product be generated automatically when
+ ## creating it's related product, the only way to know if it's the first time, it's
+ ## to check it's accession (it's the same as the key. It looks kinda of redundant
+ ## but allows to have a simpler function to generate filename that uses get_info)
+ if ( !exists($self->{$product}->{$product_acc}->{'accession'}) ) {
+ main::log_it (9, "DEBUG: creating new $product with accession='$product_acc'.");
+ $self->{$product}->{$product_acc}->{'accession'} = $product_acc;
+ }
+ ## fill it with all the data
+ foreach my $key (keys %data) {
+ $self->{$product}->{$product_acc}->{$key} = $data{$key};
+ ## if we're adding info about gene and related products, the array on their
+ ## part of structure needs to be updated
+ if ($key eq 'gene') {
+ my $products_on_gene = \@{$self->{'gene'}->{$data{$key}}->{$product}};
+ push (@$products_on_gene, $product_acc);
+ main::clean_array($products_on_gene);
+ } elsif ($key eq 'transcript' && $product eq 'protein') {
+ my $transcript_acc = $data{$key};
+ my $current = $self->{'transcript'}->{$transcript_acc}->{'protein'};
+ if ($current && $current ne $product_acc) {
+ warn "WARNING: replacing accession $current with $product_acc as product of $transcript_acc. Please report this bug.";
+ $self->{'transcript'}->{'protein'} = $product_acc;
+ }
+ } elsif ($key eq 'protein' && $product eq 'transcript') {
+ my $protein_acc = $data{$key};
+ my $current = $self->{'protein'}->{$protein_acc}->{'transcript'};
+ if ($current && $current ne $product_acc) {
+ warn "WARNING: replacing accession $current with $product_acc as ''template'' of $product_acc. Please report this bug.";
+ $self->{'protein'}->{'transcript'} = $product_acc;
+ }
+ }
+ }
+}
+## get information from the structure
+## $value = $object->get_info('gene', $gene_id, 'wanted info');
+## $value = $object->get_info('protein', $protein_acc, 'wanted info');
+sub get_info {
+ my $self = shift;
+ my $type = shift;
+ my $key = shift;
+ my $req = shift;
+ ## can't check the request here and return the key if the request is accession
+ ## or id because even though we're using product accessions and gene UID as keys,
+ ## gene also have accessions and products have id and if we had support for them
+ ## later, it would create a lot of confusion and bugs
+ return $self->{$type}->{$key}->{$req};
+}
+
+## returns a list of all gene UIDs or product accessions
+## @gene_uids = $structure->get_list('gene')
+## @mRNA_acc = $structure->get_list('transcript')
+sub get_list {
+ my $self = shift;
+ my $type = shift;
+ return keys %{$self->{$type}};
+}
+
+## for the specified genes UIDs returns list of the requested products accessions
+## If no gene_id is specified, returns a list of all accessions of the product requested
+## @mRNA_acc = structure->get_product_list('transcript', $gene_uid)
+## @prot_acc = structure->get_product_list('protein', @gene_uid)
+## @prot_acc = structure->get_product_list('protein')
+sub get_product_list {
+ my $self = shift;
+ my $product = shift;
+ if (@_ == 1) { return @{$self->{'gene'}->{$_[0]}->{$product}}; }
+ elsif (@_ == 0) { return $self->get_list($product); }
+ else { return map { $self->get_product_list($product, $_) } @_; }
+}
+
+## this =back closes the last point on the NOTES on usage section
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+bp_genbank_ref_extractor - Retrieves all related sequences for a list of searches on Entrez gene
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+B<bp_genbank_ref_extractor> [options] [Entrez Gene Queries]
+
+=head1 DESCRIPTION
+
+This script searches on I<Entrez Gene> database and retrieves not only the gene sequence but
+also the related transcript and protein sequences.
+
+The gene UIDs of multiple searches are collected before attempting to retrieve them so each gene
+will only be analyzed once even if appearing as result on more than one search.
+
+Note that I<by default no sequences are saved> (see options and examples).
+
+=head1 OPTIONS
+
+Several options can be used to fine tune the script behaviour. It is possible to obtain extra
+base pairs upstream and downstream of the gene, control the naming of files and genome assembly to use.
+
+See the section bugs for problems when using default values of options.
+
+=over
+
+=item B<--assembly>
+
+When retrieving the sequence, a specific assemly can be defined. The value expected
+is a regex that will be case-insensitive. If it matches more than one assembly, it will
+use the first match. It defauls to C<(primary|reference) assembly>.
+
+=item B<--debug>
+
+If set, even more output will be printed that may help on debugging. Unlike the messages
+from B<--verbose> and B<--very-verbose>, these will not appear on the log file
+unless this option is selected. This option also sets B<--very-verbose>.
+
+=item B<--downstream>, B<--down>
+
+Specifies the number of extra base pairs to be retrieved downstream of the gene.
+This extra base pairs will only affect the gene sequence, not the transcript or proteins.
+
+=item B<--email>
+
+A valid email used to connect to the NCBI servers. This may be used by
+NCBI to contact users in case of problems and before blocking access in
+case of heavy usage.
+
+=item B<--format>
+
+Specifies the format that the sequences will be saved. Defaults to I<genbank> format.
+Valid formats are 'genbank' or 'fasta'.
+
+=item B<--genes>
+
+Specifies the name for gene file. By default, they are not saved. If no value is given
+defaults to its UID. Possible values are 'uid', 'name', 'symbol' (the official symbol or
+nomenclature).
+
+=item B<--help>
+
+Display the documentation (this text).
+
+=item B<--limit>
+
+When making a query, limit the result to these first specific results. This is to
+prevent the use of specially unspecific queries and a warning will be given if a
+query returns more results than the limit. The default value is 200. Note that
+this limit is for I<each> search.
+
+=item B<--non-coding>, B<--nonon-coding>
+
+Some protein coding genes have transcripts that are non-coding. By default, these sequences are
+saved as well. B<--nonon-coding> can be used to ignore those transcripts.
+
+=item B<--proteins>
+
+Specifies the name for proteins file. By default, they are not saved. If no value is given
+defaults to its accession. Possible values are 'accession', 'description', 'gene' (the corresponding
+gene ID) and 'transcript' (the corresponding transcript accesion).
+
+Note that if not using 'accession' is possible for files to be overwritten. It is possible for the same gene
+to encode more than one protein or different proteins to have the same description.
+
+=item B<--pseudo>, B<--nopseudo>
+
+By default, sequences of pseudo genes will be saved. B<--nopseudo> can be used to ignore those genes.
+
+=item B<--save>
+
+Specifies the path for the directory where the sequence and log files will be saved. If the
+directory does not exist it will be created altough the path to it must exist. Files on the
+directory may be rewritten if necessary. If unspecified, a directory named F<extracted sequences>
+on the current directory will be used.
+
+=item B<--save-data>
+
+This options saves the data (gene UIDs, description, product accessions, etc) to
+a file. As an optional value, the file format can be specified. Defaults to CSV.
+
+Currently only CSV is supported.
+
+Saving the data structure as a CSV file, requires the installation of the Text::CSV module.
+
+=item B<--transcripts>, B<--mrna>
+
+Specifies the name for transcripts file. By default, they are not saved. If no value is given
+defaults to its accession. Possible values are 'accession', 'description', 'gene' (the corresponding
+gene ID) and 'protein' (the protein the transcript encodes).
+
+Note that if not using 'accession' is possible for files to be overwritten. It is possible for the same gene
+to have more than one transcript or different transcripts to have the same description. Also, non-coding
+transcripts will create problems if using 'protein'.
+
+=item B<--upstream>, B<--up>
+
+Specifies the number of extra base pairs to be extracted upstream of the gene.
+This extra base pairs will only affect the gene sequence, not the transcript or proteins.
+
+=item B<--verbose>, B<--v>
+
+If set, program becomes verbose. For an extremely verbose program, use B<--very-verbose> instead.
+
+=item B<--very-verbose>, B<--vv>
+
+If set, program becomes extremely verbose. Setting this option, automatically sets B<--verbose> as well.
+For help in debugging, consider using B<--debug>
+
+=back
+
+=head1 EXAMPLES
+
+=over
+
+=item C<bp_genbank_ref_extractor --transcripts=accession '"homo sapiens"[organism] AND H2B'>
+
+Search Entrez gene with the query C<'"homo sapiens"[organism] AND H2B'>, and
+save their transcripts sequences. Note that default value of B<--limit> may only extract
+some of the hits.
+
+=item C<bp_genbank_ref_extractor --transcripts=accession --proteins=accession --format=fasta '"homo sapiens"[organism] AND H2B' '"homo sapiens"[organism] AND MCPH1'>
+
+Same as first example but also searches for C<'"homo sapiens"[organism] AND MCPH1'>,
+proteins sequences, and saves them in the fasta format.
+
+=item C<bp_genbank_ref_extractor --genes --up=100 --down=500 '"homo sapiens"[organism] AND H2B'>
+
+Same search as first example but saves the genomic sequences instead including
+100 and 500 bp upstream and downstream.
+
+=item C<bp_genbank_ref_extractor --genes --asembly='Alternate HuRef' '"homo sapiens"[organism] AND H2B'>
+
+Same search as first example but saves genomic sequences and from the Alternate HuRef genome assembly instead.
+
+=item C<bp_genbank_ref_extractor --save-data=CSV '"homo sapiens"[organism] AND H2B'>
+
+Same search as first example but does not save any sequence but saves all the results in a CSV file.
+
+=item C<bp_genbank_ref_extractor --save='search results' --genes=name --upstream=200 downstream=500 --nopseudo --nonnon-coding --transcripts --proteins --format=fasta --save-data=CSV '"homo sapiens"[organism] AND H2B' '"homo sapiens"[organism] AND MCPH1'>
+
+Searches on Entrez gene for both C<'"homo sapiens"[organism] AND H2B'> and C<'"homo sapiens"[organism] AND MCPH1'>
+and saves the gene sequences of all hits (not passing the default limit and ignoring pseudogenes) plus 200 and 500bp
+upstream and downstream of them. It will also save the sequences of all transcripts and proteins of each gene (but
+ignoring non-coding transcripts). It will save the sequences in the fasta format, inside a directory C<search results>,
+and save the results in a CSV file
+
+=back
+
+=head1 KNOWN BUGS
+
+=over
+
+=item *
+
+When supplying options, it's possible to not supply a value and use their default. However,
+when the expected value is a string, the next argument may be confused as value for the
+option. For example, when using the following command:
+
+C<bp_genbank_ref_extractor --transcripts 'H2A AND homo sapiens'>
+
+we mean to search for 'H2A AND homo sapiens' saving only the transcripts and using the default
+as base for the filename. However, the search terms will be interpreted as the base for the
+filenames (but since it's not a valid identifier, it will return an error). To prevent
+this, you can either specify the values:
+
+C<bp_genbank_ref_extractor --transcripts 'accession' 'H2A AND homo sapiens'>
+
+C<bp_genbank_ref_extractor --transcripts='accession' 'H2A AND homo sapiens'>
+
+or you can use the double hash to stop processing options. Note that this should only be used
+after the last option. All arguments supplied after the double dash will be interpreted as search terms
+
+C<bp_genbank_ref_extractor --transcripts -- 'H2A AND homo sapiens'>
+
+=back
+
+=head1 NOTES ON USAGE
+
+=over
+
+=item *
+
+Genes that are marked as 'live' and 'protein-coding' should have at least one
+transcript. However, This is not always true due to mistakes on annotation. Such
+cases will throw a warning. When faced with this, be nice and write to the entrez
+RefSeq maintainers L<http://www.ncbi.nlm.nih.gov/RefSeq/update.cgi>.
+
+=item *
+When creating the directories to save the files, if the directory already exists it will be used and no error
+or warning will be issued unless B<--debug> as been set. If a non-directory file already exists with that name
+bp_genbank_ref_extractor exits with an error.
+
+=item *
+On the subject of verbosity, all messages are saved on the log file. The options
+B<--verbose> and B<--very-verbose> only affect their printing to standard
+output. Debug messages are different as they will only show up (and be logged)
+if requested with B<--debug>.
+
+=item *
+When saving a file, to avoid problems with limited filesystems such as NTFS or FAT, only some
+characters are allowed. All other characters will be replaced by an underscore. Allowed characters
+are:
+
+B<a-z 0-9 - + . , () {} []'>
+
+=item *
+
+B<bp_genbank_ref_extractor> tries to use the same file extensions that bioperl
+would expect when saving the file. If unable it will use the '.seq' extension.
+
+=back
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Carnë Draug <carandraug+dev at gmail.com>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2011-2015 by Carnë Draug.
+
+This software is available under the GNU General Public License, Version 3, June 2007.
+
+=cut
diff --git a/dist.ini b/dist.ini
new file mode 100644
index 0000000..19e79fc
--- /dev/null
+++ b/dist.ini
@@ -0,0 +1,24 @@
+name = Bio-EUtilities
+version = 1.75
+author = cjfields <cjfields at cpan.org>
+license = Perl_5
+copyright_holder = Chris Fields
+copyright_year = 2013
+
+[@Filter]
+-bundle = @BioPerl
+-remove = Test::NoTabs ; because CoordinateMapper.t and GeneCoordinateMapper.t require tabs
+-remove = PodCoverageTests
+-remove = AutoPrereqs
+
+[Prereqs / Requires]
+LWP::UserAgent = 0
+XML::Simple = 0
+Bio::DB::GenericWebAgent = 0
+Bio::ParameterBaseI = 0
+Bio::Root::IO = 0
+Bio::Root::Root = 0
+
+[AutoPrereqs]
+skip = ^inc
+skip = ^blib
diff --git a/inc/TestHelper.pm b/inc/TestHelper.pm
new file mode 100644
index 0000000..b49a6f2
--- /dev/null
+++ b/inc/TestHelper.pm
@@ -0,0 +1,64 @@
+package inc::TestHelper;
+use strict;
+use warnings;
+
+require Exporter;
+our @ISA = qw(Exporter);
+our @EXPORT_OK = qw(test_output_file test_input_file);
+
+=head2 test_output_file
+
+ Title : test_output_file
+ Usage : my $output_file = test_output_file();
+ Function: Get the full path of a file suitable for writing to.
+ When your test script ends, the file will be automatically deleted.
+ Returns : string (file path)
+ Args : none
+
+=cut
+
+sub test_output_file {
+ die "test_output_file takes no args\n" if @_;
+ my $tmp = File::Temp->new();
+ close($tmp); # Windows needs this
+ return $tmp->filename;
+}
+
+=head2 test_output_dir
+
+ Title : test_output_dir
+ Usage : my $output_dir = test_output_dir();
+ Function: Get the full path of a directory suitable for storing temporary files
+ in.
+ When your test script ends, the directory and its contents will be
+ automatically deleted.
+ Returns : string (path)
+ Args : none
+
+=cut
+
+sub test_output_dir {
+ die "test_output_dir takes no args\n" if @_;
+
+ return tempdir(CLEANUP => 1);
+}
+
+=head2 test_input_file
+
+ Title : test_input_file
+ Usage : my $input_file = test_input_file();
+ Function: Get the path of a desired input file stored in the standard location
+ (currently t/data), but correct for all platforms.
+ Returns : string (file path)
+ Args : list of strings (ie. at least the input filename, preceded by the
+ names of any subdirectories within t/data)
+ eg. for the file t/data/in.file pass 'in.file', for the file
+ t/data/subdir/in.file, pass ('subdir', 'in.file')
+
+=cut
+
+sub test_input_file {
+ return File::Spec->catfile('t', 'data', @_);
+}
+
+1;
diff --git a/lib/Bio/DB/EUtilities.pm b/lib/Bio/DB/EUtilities.pm
new file mode 100644
index 0000000..9606071
--- /dev/null
+++ b/lib/Bio/DB/EUtilities.pm
@@ -0,0 +1,1257 @@
+package Bio::DB::EUtilities;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::DB::EUtilities::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::DB::GenericWebAgent);
+use Bio::Tools::EUtilities::EUtilParameters;
+use Bio::Tools::EUtilities;
+
+# ABSTRACT: Webagent which interacts with and retrieves data from NCBI's eUtils.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub new {
+ my($class, at args) = @_;
+ my $self = $class->SUPER::new(@args);
+ my $params = Bio::Tools::EUtilities::EUtilParameters->new(-verbose => $self->verbose,
+ @args);
+ # cache parameters
+ $self->parameter_base($params);
+ return $self;
+}
+
+
+
+
+
+sub get_Response {
+ my $self = shift;
+ $self->parameter_base->email() || $self->warn('The -email parameter is now required, per NCBI E-utilities policy');
+ $self->SUPER::get_Response(@_);
+}
+
+
+
+
+
+sub delay_policy {
+ my $self = shift;
+ return 1/3;
+}
+
+
+sub get_Parser {
+ my ($self) = @_;
+ my $pobj = $self->parameter_base;
+ if ($pobj->parameters_changed || !$self->{'_parser'}) {
+ my $eutil = $pobj->eutil ;
+ if ($eutil eq 'efetch') {
+ $self->throw("No parser defined for efetch; use get_Response() directly");
+ };
+ # if we are to add pipe/tempfile support this would probably be the
+ # place to add it....
+ my $parser = Bio::Tools::EUtilities->new(
+ -eutil => $eutil,
+ -response => $self->get_Response,
+ -parameters => $pobj,
+ -verbose => $self->verbose);
+ return $self->{'_parser'} = $parser;
+ }
+ return $self->{'_parser'};
+}
+
+
+
+sub set_parameters {
+ my ($self, @args) = @_;
+ # just ensures that parser instance isn't reused
+ delete $self->{'_parser'};
+ $self->parameter_base->set_parameters(@args);
+}
+
+
+sub reset_parameters {
+ my ($self, @args) = @_;
+ # just ensures that parser instance isn't reused
+ delete $self->{'_parser'};
+ $self->parameter_base->reset_parameters(@args);
+}
+
+
+sub available_parameters {
+ my ($self, @args) = @_;
+ return $self->parameter_base->available_parameters(@args);
+}
+
+
+sub get_parameters {
+ my ($self, @args) = @_;
+ return $self->parameter_base->get_parameters(@args);
+}
+
+
+sub get_parameter_values {
+ my ($self, $p) = @_;
+ my %params = $self->parameter_base->get_parameters(-list => [$p]);
+ if (exists $params{$p}) {
+ return ref $params{$p} eq 'ARRAY' ? @{$params{$p}} : $params{$p};
+ }
+ return;
+}
+
+
+
+sub eutil {
+ my ($self, @args) = @_;
+ return $self->get_Parser->eutil(@args);
+}
+
+
+sub datatype {
+ my ($self, @args) = @_;
+ return $self->get_Parser->datatype(@args);
+}
+
+
+sub to_string {
+ my ($self, @args) = @_;
+ return $self->get_Parser->to_string(@args);
+}
+
+
+sub print_all {
+ my ($self, @args) = @_;
+ return $self->get_Parser->print_all(@args);
+}
+
+
+sub get_ids {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_ids(@args);
+}
+
+
+sub get_database {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_database(@args);
+}
+
+
+sub get_db {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_db(@args);
+}
+
+
+sub get_databases {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_databases(@args);
+}
+
+
+sub get_dbs {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_databases(@args);
+}
+
+
+sub next_History {
+ my ($self, @args) = @_;
+ return $self->get_Parser->next_History(@args);
+}
+
+
+sub next_cookie {
+ my ($self, @args) = @_;
+ return $self->get_Parser->next_History(@args);
+}
+
+
+sub get_Histories {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_Histories(@args);
+}
+
+
+sub get_count {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_count(@args);
+}
+
+
+sub get_term {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_term(@args);
+}
+
+
+sub get_translation_from {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_translation_from(@args);
+}
+
+
+sub get_translation_to {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_translation_to(@args);
+}
+
+
+sub get_retstart {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_retstart(@args);
+}
+
+
+sub get_retmax {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_retmax(@args);
+}
+
+
+sub get_query_translation {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_query_translation(@args);
+}
+
+
+sub get_corrected_query {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_corrected_query(@args);
+}
+
+
+sub get_replaced_terms {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_replaced_terms(@args);
+}
+
+
+sub next_GlobalQuery {
+ my ($self, @args) = @_;
+ return $self->get_Parser->next_GlobalQuery(@args);
+}
+
+
+sub get_GlobalQueries {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_GlobalQueries(@args);
+}
+
+
+sub print_GlobalQueries {
+ my ($self, @args) = @_;
+ return $self->get_Parser->print_GlobalQueries(@args);
+}
+
+
+sub next_DocSum {
+ my ($self, @args) = @_;
+ return $self->get_Parser->next_DocSum(@args);
+}
+
+
+sub get_DocSums {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_DocSums(@args);
+}
+
+
+sub print_DocSums {
+ my ($self, @args) = @_;
+ return $self->get_Parser->print_DocSums(@args);
+}
+
+
+sub get_available_databases {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_available_databases(@args);
+}
+
+
+sub get_record_count {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_record_count(@args);
+}
+
+
+sub get_last_update {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_last_update(@args);
+}
+
+
+sub get_menu_name {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_menu_name(@args);
+}
+
+
+sub get_description {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_description(@args);
+}
+
+
+sub next_FieldInfo {
+ my ($self, @args) = @_;
+ return $self->get_Parser->next_FieldInfo(@args);
+}
+
+
+sub get_FieldInfo {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_FieldInfo(@args);
+}
+
+*get_FieldInfos = \&get_FieldInfo;
+
+
+sub next_LinkInfo {
+ my ($self, @args) = @_;
+ return $self->get_Parser->next_LinkInfo(@args);
+}
+
+
+sub get_LinkInfo {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_LinkInfo(@args);
+}
+
+*get_LinkInfos = \&get_LinkInfo;
+
+
+sub print_FieldInfo {
+ my ($self, @args) = @_;
+ return $self->get_Parser->print_FieldInfo(@args);
+}
+
+
+sub print_LinkInfo {
+ my ($self, @args) = @_;
+ return $self->get_Parser->print_LinkInfo(@args);
+}
+
+
+sub next_LinkSet {
+ my ($self, @args) = @_;
+ return $self->get_Parser->next_LinkSet(@args);
+}
+
+
+# add support for retrieval of data if lazy parsing is enacted
+
+sub get_LinkSets {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_LinkSets(@args);
+}
+
+
+sub print_LinkSets {
+ my ($self, @args) = @_;
+ return $self->get_Parser->print_LinkSets(@args);
+}
+
+
+sub get_linked_databases {
+ my ($self, @args) = @_;
+ return $self->get_Parser->get_linked_databases(@args);
+}
+
+
+
+sub rewind {
+ my ($self, $string) = @_;
+ return $self->get_Parser->rewind($string);
+}
+
+
+sub generate_iterator {
+ my ($self, @args) = @_;
+ return $self->get_Parser->generate_iterator(@args);
+}
+
+
+sub callback {
+ my ($self, @args) = @_;
+ return $self->get_Parser->callback(@args);
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::DB::EUtilities - Webagent which interacts with and retrieves data from NCBI's eUtils.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ my $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'esearch',
+ -term => 'BRCA1',
+ -db => 'pubmed',
+ -retmax => 10000,
+ -email => 'foo at bar.org'); # please use your real email
+
+ # eutil => any of esearch, esummary, elink
+ @ids = $eutil->get_ids(); # returns array or array ref of IDs
+
+ # eutil => any of egquery, espell
+
+ $term = $eutil->get_term(); # returns array or array ref of IDs
+
+ # eutil => any of elink, einfo
+
+ $db = $eutil->get_database(); # returns database
+
+ # Query-related methods (esearch, egquery, espell data)
+ # eutil data centered on use of search terms
+
+ my $ct = $eutil->get_count; # uses optional database for egquery count
+ my $translation = $eutil->get_count;
+
+ my $corrected = $eutil->get_corrected_query; # espell
+
+ while (my $gquery = $eutil->next_GlobalQuery) {
+ # iterates through egquery data
+ }
+
+ # Info-related methods (einfo data)
+ # database-related information
+
+ my $desc = $eutil->get_description;
+ my $update = $eutil->get_last_update;
+ my $nm = $eutil->get_menu_name;
+ my $ct = $eutil->get_record_count;
+
+ while (my $field = $eutil->next_FieldInfo) {
+ # ...
+ }
+ while (my $field = $eutil->next_LinkInfo) {
+ # ...
+ }
+
+ # History methods (epost data, some data returned from elink)
+ # data which enables one to retrieve and query against user-stored
+ # information on the NCBI server
+
+ while (my $cookie = $eutil->next_History) {
+ # ...
+ }
+
+ my @hists = $eutil->get_Histories;
+
+ # Bio::Tools::EUtilities::Summary (esummary data)
+ # information on a specific database record
+
+ # retrieve nested docsum data
+ while (my $docsum = $eutil->next_DocSum) {
+ print "ID:",$docsum->get_ids,"\n";
+ while (my $item = $docsum->next_Item) {
+ # do stuff here...
+ while (my $listitem = $docsum->next_ListItem) {
+ # do stuff here...
+ while (my $listitem = $docsum->next_Structure) {
+ # do stuff here...
+ }
+ }
+ }
+ }
+
+ # retrieve flattened item list per DocSum
+ while (my $docsum = $eutil->next_DocSum) {
+ my @items = $docsum->get_all_DocSum_Items;
+ }
+
+=head1 DESCRIPTION
+
+This is a general webagent which posts and retrieves data to NCBI's eUtils
+service using their CGI interface. A separate project, currently present in
+BioPerl-Run, utilizes the SOAP-based interface for eUtils.
+
+A full explanation of the eUtils interfaces are not possible within this short
+module; please see the BioPerl wiki HOWTO for more details.
+
+=head1 METHODS
+
+=head2
+
+=head1 TODO
+
+=over 3
+
+=item * Finish documentation
+
+HOWTOs (both standard and Cookbook).
+
+=item * Cookbook tests
+
+Set up dev-only tests for Cookbook examples to make sure they are consistently
+updated.
+
+=item * API
+
+Mark Jensen has written up the SOAP-based eUtil modules, maybe we should
+coalesce around a consistent API between the two (they are close).
+
+=item * Carryover of parameters
+
+Maybe add a default but configurable list of parameters that can be carried over
+between calls.
+
+=item * Make setting certain parameters consistent
+
+Setting history is a bit inconsistent, so maybe use a common alias for this?
+
+=item * Moosify?
+
+Not necessary, but it might make things easier and more maintainable in the long
+run.
+
+=back
+
+=head1 Bio::DB::GenericWebAgent methods
+
+=head1 GenericWebDBI methods
+
+=head2 parameter_base
+
+ Title : parameter_base
+ Usage : $dbi->parameter_base($pobj);
+ Function: Get/Set Bio::ParameterBaseI.
+ Returns : Bio::ParameterBaseI object
+ Args : Bio::ParameterBaseI object
+
+=head2 ua
+
+ Title : ua
+ Usage : $dbi->ua;
+ Function: Get/Set LWP::UserAgent.
+ Returns : LWP::UserAgent
+ Args : LWP::UserAgent
+
+=head2 get_Response
+
+ Title : get_Response
+ Usage : $agent->get_Response;
+ Function: Get the HTTP::Response object by passing it an HTTP::Request (generated from
+ Bio::ParameterBaseI implementation).
+ Returns : HTTP::Response object or data if callback is used
+ Args : (optional)
+
+ -cache_response - flag to cache HTTP::Response object;
+ Default is 1 (TRUE, caching ON)
+
+ These are passed on to LWP::UserAgent::request() if stipulated
+
+ -cb - use a LWP::UserAgent-compliant callback
+ -file - dumps the response to a file (handy for large responses)
+ Note: can't use file and callback at the same time
+ -read_size_hint - bytes of content to read in at a time to pass to callback
+ Note : Caching and parameter checking are set
+
+=head2 delay
+
+ Title : delay
+ Usage : $secs = $self->delay([$secs])
+ Function: get/set number of seconds to delay between fetches
+ Returns : number of seconds to delay
+ Args : new value
+
+NOTE: the default is to use the value specified by delay_policy().
+This can be overridden by calling this method.
+
+=head1 LWP::UserAgent related methods
+
+=head2 proxy
+
+ Title : proxy
+ Usage : $httpproxy = $db->proxy('http') or
+ $db->proxy(['http','ftp'], 'http://myproxy' )
+ Function: Get/Set a proxy for use of proxy
+ Returns : a string indicating the proxy
+ Args : $protocol : an array ref of the protocol(s) to set/get
+ $proxyurl : url of the proxy to use for the specified protocol
+ $username : username (if proxy requires authentication)
+ $password : password (if proxy requires authentication)
+
+=head2 authentication
+
+ Title : authentication
+ Usage : $db->authentication($user,$pass)
+ Function: Get/Set authentication credentials
+ Returns : Array of user/pass
+ Args : Array or user/pass
+
+=head2 delay_policy
+
+ Title : delay_policy
+ Usage : $secs = $self->delay_policy
+ Function: return number of seconds to delay between calls to remote db
+ Returns : number of seconds to delay
+ Args : none
+
+ NOTE: NCBI requests a delay of 3 requests per second. This method implements
+ that policy. This will likely change to check time of day or number of
+ requests for lengthening delays.
+
+=head2 get_Parser
+
+ Title : get_Parser
+ Usage : $agent->get_Parser;
+ Function: Retrieve the parser used for last agent request
+ Returns : The Bio::Tools::EUtilities parser used to parse the HTTP::Response
+ content
+ Args : None
+ Note : Abstract method; defined by implementation
+
+=head1 Bio::Tools::EUtilities::EUtilParameters-delegating methods
+
+This is only a subset of parameters available from Bio::Tools::EUtilities::EUtilParameters (the
+ones deemed absolutely necessary). All others are available by calling
+'parameter_base-E<gt>method' when needed.
+
+=head2 set_parameters
+
+ Title : set_parameters
+ Usage : $pobj->set_parameters(@params);
+ Function: sets the NCBI parameters listed in the hash or array
+ Returns : None
+ Args : [optional] hash or array of parameter/values.
+ Note : This sets any parameter (i.e. doesn't screen them). In addition to
+ regular eutil-specific parameters, you can set the following:
+
+ -eutil - the eUtil to be used (default 'efetch')
+ -history - pass a HistoryI-implementing object, which
+ sets the WebEnv, query_key, and possibly db and linkname
+ (the latter two only for LinkSets)
+ -correspondence - Boolean flag, set to TRUE or FALSE; indicates how
+ IDs are to be added together for elink request where
+ ID correspondence might be needed
+ (default 0)
+
+=head2 reset_parameters
+
+ Title : reset_parameters
+ Usage : resets values
+ Function: resets parameters to either undef or value in passed hash
+ Returns : none
+ Args : [optional] hash of parameter-value pairs
+ Note : this also resets eutil(), correspondence(), and the history and request
+ cache
+
+=head2 available_parameters
+
+ Title : available_parameters
+ Usage : @params = $pobj->available_parameters()
+ Function: Returns a list of the available parameters
+ Returns : Array of available parameters (no values)
+ Args : [optional] A string; either eutil name (for returning eutil-specific
+ parameters) or 'history' (for those parameters allowed when retrieving
+ data stored on the remote server using a 'History' object).
+
+=head2 get_parameters
+
+ Title : get_parameters
+ Usage : @params = $pobj->get_parameters;
+ %params = $pobj->get_parameters;
+ Function: Returns list of key/value pairs, parameter => value
+ Returns : Flattened list of key-value pairs. All key-value pairs returned,
+ though subsets can be returned based on the '-type' parameter.
+ Data passed as an array ref are returned based on whether the
+ '-join_id' flag is set (default is the same array ref).
+ Args : -type : the eutil name or 'history', for returning a subset of
+ parameters (Default: returns all)
+ -join_ids : Boolean; join IDs based on correspondence (Default: no join)
+
+=head2 get_parameter_values
+
+ Title : get_parameter_values
+ Usage : @vals = $factory->get_parameter_value('id'); # always get array
+ Function: Returns the specific parameter values.
+ Returns : For consistency returns a list of values for this parameter. If only
+ one is expected, use:
+
+ ($val) = $factory->get_parameter_value('id');
+
+ Args : parameter expected
+
+=head1 Bio::Tools::EUtilities-delegating methods
+
+=head1 Bio::Tools::EUtilities::EUtilDataI methods
+
+=head2 eutil
+
+ Title : eutil
+ Usage : $eutil->$foo->eutil
+ Function : Get/Set eutil
+ Returns : string
+ Args : string (eutil)
+ Throws : on invalid eutil
+
+=head2 datatype
+
+ Title : datatype
+ Usage : $type = $foo->datatype;
+ Function : Get/Set data object type
+ Returns : string
+ Args : string
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Implemented in plugins
+
+=head2 print_all
+
+ Title : print_all
+ Usage : $info->print_all();
+ $info->print_all(-fh => $fh, -cb => $coderef);
+ Function : prints (dumps) all data in parser. Unless a coderef is supplied,
+ this just dumps the parser-specific to_string method to either a
+ file/fh or STDOUT
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a LinkSet object
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for einfo. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Methods useful for multiple eutils
+
+=head2 get_ids
+
+ Title : get_ids
+ Usage : my @ids = $parser->get_ids
+ Function : returns array of requested IDs (see Notes for more specifics)
+ Returns : array
+ Args : [conditional] not required except when running elink queries against
+ multiple databases. In case of the latter, the database name is
+ optional but recommended when retrieving IDs as the ID list will
+ be globbed together. In such cases, if a db name isn't provided a
+ warning is issued as a reminder.
+ Notes : esearch : returned ID list
+ elink : returned ID list (see Args above for caveats)
+ all others : from parameter_base->id or undef
+
+=head2 get_database
+
+ Title : get_database
+ Usage : my $db = $info->get_database;
+ Function : returns single database name (eutil-compatible). This is the queried
+ database. For most eutils this is straightforward. For elinks
+ (which have 'db' and 'dbfrom') this is dbto, for egquery, it is the first
+ db in the list (you probably want get_databases instead)
+ Returns : string
+ Args : none
+ Notes : egquery : first db in the query (you probably want get_databases)
+ einfo : the queried database
+ espell : the queried database
+ elink : from parameter_base->dbfrom or undef
+ all others : from parameter_base->db or undef
+
+=head2 get_db (alias for get_database)
+
+=head2 get_databases
+
+ Title : get_databases
+ Usage : my @dbs = $parser->get_databases
+ Function : returns list of databases
+ Returns : array of strings
+ Args : none
+ Notes : This is guaranteed to return a list of databases. For a single
+ database use the convenience method get_db/get_database
+
+ egquery : list of all databases in the query
+ einfo : the queried database
+ espell : the queried database
+ all others : from parameter_base->db or undef
+
+=head2 get_dbs (alias for get_databases)
+
+=head2 next_History
+
+ Title : next_History
+ Usage : while (my $hist=$parser->next_History) {...}
+ Function : returns next HistoryI (if present).
+ Returns : Bio::Tools::EUtilities::HistoryI (Cookie or LinkSet)
+ Args : none
+ Note : esearch, epost, and elink are all capable of returning data which
+ indicates search results (in the form of UIDs) is stored on the
+ remote server. Access to this data is wrapped up in simple interface
+ (HistoryI), which is implemented in two classes:
+ Bio::DB::EUtilities::History (the simplest) and
+ Bio::DB::EUtilities::LinkSet. In general, calls to epost and esearch
+ will only return a single HistoryI object (formerly known as a
+ Cookie), but calls to elink can generate many depending on the
+ number of IDs, the correspondence, etc. Hence this iterator, which
+ allows one to retrieve said data one piece at a time.
+
+=head2 next_cookie (alias for next_History)
+
+=head2 get_Histories
+
+ Title : get_Histories
+ Usage : my @hists = $parser->get_Histories
+ Function : returns list of HistoryI objects.
+ Returns : list of Bio::Tools::EUtilities::HistoryI (Cookie or LinkSet)
+ Args : none
+
+=head1 Query-related methods
+
+=head2 get_count
+
+ Title : get_count
+ Usage : my $ct = $parser->get_count
+ Function : returns the count (hits for a search)
+ Returns : integer
+ Args : [CONDITIONAL] string with database name - used to retrieve
+ count from specific database when using egquery
+ Notes : egquery : count for specified database (specified above)
+ esearch : count for last search
+ all others : undef
+
+=head2 get_term
+
+ Title : get_term
+ Usage : $st = $qd->get_term;
+ Function : retrieve the term for the global search
+ Returns : string
+ Args : none
+ Notes : egquery : search term
+ espell : search term
+ esearch : from parameter_base->term or undef
+ all others : undef
+
+=head2 get_translation_from
+
+ Title : get_translation_from
+ Usage : $string = $qd->get_translation_from();
+ Function: portion of the original query replaced with translated_to()
+ Returns : string
+ Args : none
+ Note : only applicable for esearch
+
+=head2 get_translation_to
+
+ Title : get_translation_to
+ Usage : $string = $qd->get_translation_to();
+ Function: replaced string used in place of the original query term in translation_from()
+ Returns : string
+ Args : none
+ Note : only applicable for esearch
+
+=head2 get_retstart
+
+ Title : get_retstart
+ Usage : $start = $qd->get_retstart();
+ Function : retstart setting for the query (either set or NCBI default)
+ Returns : Integer
+ Args : none
+ Notes : esearch : retstart
+ esummary : retstart
+ all others : from parameter_base->retstart or undef
+
+=head2 get_retmax
+
+ Title : get_retmax
+ Usage : $max = $qd->get_retmax();
+ Function : retmax setting for the query (either set or NCBI default)
+ Returns : Integer
+ Args : none
+ Notes : esearch : retmax
+ esummary : retmax
+ all others : from parameter_base->retmax or undef
+
+=head2 get_query_translation
+
+ Title : get_query_translation
+ Usage : $string = $qd->get_query_translation();
+ Function: returns the translated query used for the search (if any)
+ Returns : string
+ Args : none
+ Notes : only applicable for esearch. This is the actual term used for
+ esearch.
+
+=head2 get_corrected_query
+
+ Title : get_corrected_query
+ Usage : my $cor = $eutil->get_corrected_query;
+ Function : retrieves the corrected query when using espell
+ Returns : string
+ Args : none
+ Notes : only applicable for espell.
+
+=head2 get_replaced_terms
+
+ Title : get_replaced_terms
+ Usage : my $term = $eutil->get_replaced_terms
+ Function : returns array of strings replaced in the query
+ Returns : string
+ Args : none
+ Notes : only applicable for espell
+
+=head2 next_GlobalQuery
+
+ Title : next_GlobalQuery
+ Usage : while (my $query = $eutil->next_GlobalQuery) {...}
+ Function : iterates through the queries returned from an egquery search
+ Returns : GlobalQuery object
+ Args : none
+ Notes : only applicable for egquery
+
+=head2 get_GlobalQueries
+
+ Title : get_GlobalQueries
+ Usage : @queries = $eutil->get_GlobalQueries
+ Function : returns list of GlobalQuery objects
+ Returns : array of GlobalQuery objects
+ Args : none
+ Notes : only applicable for egquery
+
+=head2 print_GlobalQueries
+
+ Title : print_GlobalQueries
+ Usage : $docsum->print_GlobalQueries();
+ $docsum->print_GlobalQueries(-fh => $fh, -cb => $coderef);
+ Function : prints item data for all global queries. The default printing
+ method is each item per DocSum is printed with relevant values if
+ present in a simple table using Text::Wrap.
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a GlobalQuery object;
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for esummary. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Summary-related methods
+
+=head2 next_DocSum
+
+ Title : next_DocSum
+ Usage : while (my $ds = $esum->next_DocSum) {...}
+ Function : iterate through DocSum instances
+ Returns : single Bio::Tools::EUtilities::Summary::DocSum
+ Args : none yet
+ Notes : only applicable for esummary
+
+=head2 get_DocSums
+
+ Title : get_DocSums
+ Usage : my @docsums = $esum->get_DocSums
+ Function : retrieve a list of DocSum instances
+ Returns : array of Bio::Tools::EUtilities::Summary::DocSum
+ Args : none
+ Notes : only applicable for esummary
+
+=head2 print_DocSums
+
+ Title : print_DocSums
+ Usage : $docsum->print_DocSums();
+ $docsum->print_DocSums(-fh => $fh, -cb => $coderef);
+ Function : prints item data for all docsums. The default printing method is
+ each item per DocSum is printed with relevant values if present
+ in a simple table using Text::Wrap.
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a DocSum object;
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for esummary. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Info-related methods
+
+=head2 get_available_databases
+
+ Title : get_available_databases
+ Usage : my @dbs = $info->get_available_databases
+ Function : returns list of available eutil-compatible database names
+ Returns : Array of strings
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_record_count
+
+ Title : get_record_count
+ Usage : my $ct = $eutil->get_record_count;
+ Function : returns database record count
+ Returns : integer
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_last_update
+
+ Title : get_last_update
+ Usage : my $time = $info->get_last_update;
+ Function : returns string containing time/date stamp for last database update
+ Returns : integer
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_menu_name
+
+ Title : get_menu_name
+ Usage : my $nm = $info->get_menu_name;
+ Function : returns string of database menu name
+ Returns : string
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_description
+
+ Title : get_description
+ Usage : my $desc = $info->get_description;
+ Function : returns database description
+ Returns : string
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 next_FieldInfo
+
+ Title : next_FieldInfo
+ Usage : while (my $field = $info->next_FieldInfo) {...}
+ Function : iterate through FieldInfo objects
+ Returns : Field object
+ Args : none
+ Notes : only applicable for einfo. Uses callback() for filtering if defined
+ for 'fields'
+
+=head2 get_FieldInfo
+
+ Title : get_FieldInfo
+ Usage : my @fields = $info->get_FieldInfo;
+ Function : returns list of FieldInfo objects
+ Returns : array (FieldInfo objects)
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 next_LinkInfo
+
+ Title : next_LinkInfo
+ Usage : while (my $link = $info->next_LinkInfo) {...}
+ Function : iterate through LinkInfo objects
+ Returns : LinkInfo object
+ Args : none
+ Notes : only applicable for einfo. Uses callback() for filtering if defined
+ for 'linkinfo'
+
+=head2 get_LinkInfo
+
+ Title : get_LinkInfo
+ Usage : my @links = $info->get_LinkInfo;
+ Function : returns list of LinkInfo objects
+ Returns : array (LinkInfo objects)
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 print_FieldInfo
+
+ Title : print_FieldInfo
+ Usage : $info->print_FieldInfo();
+ $info->print_FieldInfo(-fh => $fh, -cb => $coderef);
+ Function : prints field data for each FieldInfo object. The default method
+ prints data from each FieldInfo in a simple table using Text::Wrap.
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method.
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Note : if -file or -fh are not defined, prints to STDOUT
+
+=head2 print_LinkInfo
+
+ Title : print_LinkInfo
+ Usage : $info->print_LinkInfo();
+ $info->print_LinkInfo(-fh => $fh, -cb => $coderef);
+ Function : prints link data for each LinkInfo object. The default is generated
+ via LinkInfo::to_string
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a LinkInfo object;
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for einfo. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Bio::Tools::EUtilities::Link-related methods
+
+=head2 next_LinkSet
+
+ Title : next_LinkSet
+ Usage : while (my $ls = $eutil->next_LinkSet {...}
+ Function : iterate through LinkSet objects
+ Returns : LinkSet object
+ Args : none
+ Notes : only applicable for elink. Uses callback() for filtering if defined
+ for 'linksets'
+
+=head2 get_LinkSets
+
+ Title : get_LinkSets
+ Usage : my @links = $info->get_LinkSets;
+ Function : returns list of LinkSets objects
+ Returns : array (LinkSet objects)
+ Args : none
+ Notes : only applicable for elink.
+
+=head2 print_LinkSets
+
+ Title : print_LinkSets
+ Usage : $info->print_LinkSets();
+ $info->print_LinkSets(-fh => $fh, -cb => $coderef);
+ Function : prints link data for each LinkSet object. The default is generated
+ via LinkSet::to_string
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a LinkSet object
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for einfo. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head2 get_linked_databases
+
+ Title : get_linked_databases
+ Usage : my @dbs = $eutil->get_linked_databases
+ Function : returns list of databases linked to in linksets
+ Returns : array of databases
+ Args : none
+ Notes : only applicable for elink.
+
+=head1 Iterator- and callback-related methods
+
+=head2 rewind
+
+ Title : rewind
+ Usage : $esum->rewind()
+ $esum->rewind('recursive')
+ Function : retrieve a list of DocSum instances
+ Returns : array of Bio::Tools::EUtilities::Summary::DocSum
+ Args : [optional] Scalar; string ('all') to reset all iterators, or string
+ describing the specific main object iterator to reset. The following
+ are recognized (case-insensitive):
+
+ 'all' - rewind all objects and also recursively resets nested object
+ interators (such as LinkSets and DocSums).
+ 'globalqueries'
+ 'fieldinfo' or 'fieldinfos'
+ 'linkinfo' or 'linkinfos'
+ 'linksets'
+ 'docsums'
+
+=head2 generate_iterator
+
+ Title : generate_iterator
+ Usage : my $coderef = $esum->generate_iterator('linkinfo')
+ Function : generates an iterator (code reference) which iterates through
+ the relevant object indicated by the args
+ Returns : code reference
+ Args : [REQUIRED] Scalar; string describing the specific object to iterate.
+ The following are currently recognized (case-insensitive):
+
+ 'globalqueries'
+ 'fieldinfo' or 'fieldinfos'
+ 'linkinfo' or 'linkinfos'
+ 'linksets'
+ 'docsums'
+
+ A second argument can also be passed to generate a 'lazy' iterator,
+ which loops through and returns objects as they are created (instead
+ of creating all data instances up front, then iterating through,
+ which is the default). Use of these iterators precludes use of
+ rewind() for the time being as we can't guarantee you can rewind(),
+ as this depends on whether the data source is seek()able and thus
+ 'rewindable'. We will add rewind() support at a later time which
+ will work for 'seekable' data.
+
+ A callback specified using callback() will be used to filter objects
+ for any generated iterator. This behaviour is implemented for both
+ normal and lazy iterator types and is the default. If you don't want
+ this, make sure to reset any previously set callbacks via
+ reset_callback() (which just deletes the code ref).
+ TODO : generate seekable iterators ala HOP for seekable fh data
+
+=head2 callback
+
+ Title : callback
+ Usage : $parser->callback(sub {$_[0]->get_database eq 'protein'});
+ Function : Get/set callback code ref used to filter returned data objects
+ Returns : code ref if previously set
+ Args : single argument:
+ code ref - evaluates a passed object and returns true or false value
+ (used in iterators)
+ 'reset' - string, resets the iterator.
+ returns upon any other args
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities.pm b/lib/Bio/Tools/EUtilities.pm
new file mode 100644
index 0000000..74d7f3f
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities.pm
@@ -0,0 +1,1544 @@
+package Bio::Tools::EUtilities;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::IO Bio::Tools::EUtilities::EUtilDataI);
+use XML::Simple;
+
+# ABSTRACT: NCBI eutil XML parsers.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+
+{
+
+my %DATA_MODULE = (
+ 'esearch' => 'Query',
+ 'egquery' => 'Query',
+ 'espell' => 'Query',
+ 'epost' => 'Query',
+ 'elink' => 'Link',
+ 'einfo' => 'Info',
+ 'esummary' => 'Summary',
+ );
+
+sub new {
+ my($caller, at args) = @_;
+ my $class = ref $caller || $caller;
+ if ($class =~ m{Bio::Tools::EUtilities::(\S+)}) {
+ my ($self) = $class->SUPER::new(@args);
+ $self->_initialize(@args);
+ return $self;
+ } else {
+ my %param = @args;
+ @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
+ my $eutil = $param{'-eutil'} || $class->throw("Need eutil to make instance");
+ return unless( $class->_load_eutil_module( $DATA_MODULE{$eutil}) );
+ return "Bio::Tools::EUtilities::$DATA_MODULE{$eutil}"->new(-datatype => lc $DATA_MODULE{$eutil},
+ -eutil => $eutil,
+ @args);
+ }
+}
+
+sub _initialize {
+ my ($self, @args) = @_;
+ my ($response, $pobj, $type, $eutil, $cache, $lazy) =
+ $self->_rearrange([qw(RESPONSE
+ PARAMETERS
+ DATATYPE
+ EUTIL
+ CACHE_RESPONSE
+ LAZY)], @args);
+ $lazy ||= 0;
+ $cache ||= 0;
+ $self->datatype($type);
+ $self->eutil($eutil);
+ # lazy parsing only implemented for elink and esummary (where returned data
+ # can be quite long). Also, no point to parsing lazily when the data is
+ # already in memory in an HTTP::Response object, so turn it off and chunk
+ # the Response object after parsing.
+ $response && $self->response($response);
+ $pobj && $self->parameter_base($pobj);
+ $self->cache_response($cache);
+ $lazy = 0 if ($response) || ($eutil ne 'elink' && $eutil ne 'esummary');
+ # setting parser to 'lazy' mode is permanent (can't reset later)
+ $self->{'_lazy'} = $lazy;
+ $self->{'_parsed'} = 0;
+}
+
+}
+
+
+sub cache_response {
+ my ($self, $cache) = @_;
+ if (defined $cache) {
+ $self->{'_cache_response'} = ($cache) ? 1 : 0;
+ }
+ return $self->{'_cache_response'};
+}
+
+
+sub response {
+ my ($self, $response) = @_;
+ if ($response) {
+ $self->throw('Not an HTTP::Response object') unless (ref $response && $response->isa('HTTP::Response'));
+ $self->{'_response'} = $response;
+ }
+ return $self->{'_response'};
+}
+
+
+sub parameter_base {
+ my ($self, $pb) = @_;
+ if ($pb) {
+ $self->throw('Not an Bio::ParameterBaseI object') unless (ref $pb && $pb->isa('Bio::ParameterBaseI'));
+ $self->warn('Not an Bio::Tools::EUtilities::EUtilParameters object; may experience some turbulence...') unless (ref $pb && $pb->isa('Bio::Tools::EUtilities::EUtilParameters'));
+ $self->{'_parameter_base'} = $pb;
+ }
+ return $self->{'_parameter_base'};
+}
+
+
+sub data_parsed {
+ return shift->{'_parsed'};
+}
+
+
+sub is_lazy {
+ return shift->{'_lazy'};
+}
+
+
+{
+my %EUTIL_DATA = (
+ 'esummary' => [qw(DocSum Item)],
+ 'epost' => [],
+ 'egquery' => [],
+ 'einfo' => [qw(Field Link)],
+ 'elink' => [qw(LinkSet LinkSetDb LinkSetDbHistory IdUrlSet
+ Id IdLinkSet ObjUrl Link LinkInfo)],
+ 'espell' => [qw(Original Replaced)],
+ 'esearch' => [qw(Id ErrorList WarningList)],
+ );
+
+sub parse_data {
+ my $self = shift;
+ my $eutil = $self->eutil;
+ my $xs = XML::Simple->new();
+ my $response = $self->response ? $self->response :
+ $self->_fh ? $self->_fh :
+ $self->throw('No response or stream specified');
+ my $simple = ($eutil eq 'espell') ?
+ $xs->XMLin($self->_fix_espell($response), forcearray => $EUTIL_DATA{$eutil}) :
+ ($response && $response->isa("HTTP::Response")) ?
+ $xs->XMLin($response->content, forcearray => $EUTIL_DATA{$eutil}) :
+ $xs->XMLin($response, forcearray => $EUTIL_DATA{$eutil});
+ # check for errors
+ if ($simple->{ERROR}) {
+ my $error = $simple->{ERROR};
+ $self->throw("NCBI $eutil fatal error: ".$error) unless ref $error;
+ }
+ if ($simple->{InvalidIdList}) {
+ $self->warn("NCBI $eutil error: Invalid ID List".$simple->{InvalidIdList});
+ return;
+ }
+ if ($simple->{ErrorList} || $simple->{WarningList}) {
+ my @errorlist = @{ $simple->{ErrorList} } if $simple->{ErrorList};
+ my @warninglist = @{ $simple->{WarningList} } if $simple->{WarningList};
+ my ($err_warn);
+ for my $error (@errorlist) {
+ my $messages = join("\n\t",map {"$_ [".$error->{$_}.']'}
+ grep {!ref $error->{$_}} keys %$error);
+ $err_warn .= "Error : $messages";
+ }
+ for my $warn (@warninglist) {
+ my $messages = join("\n\t",map {"$_ [".$warn->{$_}.']'}
+ grep {!ref $warn->{$_}} keys %$warn);
+ $err_warn .= "Warnings : $messages";
+ }
+ chomp($err_warn);
+ $self->warn("NCBI $eutil Errors/Warnings:\n".$err_warn)
+ # don't return as some data may still be useful
+ }
+ delete $self->{'_response'} unless $self->cache_response;
+ $self->{'_parsed'} = 1;
+ $self->_add_data($simple);
+}
+
+# implemented only for elink/esummary, still experimental
+
+sub parse_chunk {
+ my $self = shift;
+ my $eutil = $self->eutil;
+ my $tag = $eutil eq 'elink' ? 'LinkSet' :
+ $eutil eq 'esummary' ? 'DocSum' :
+ $self->throw("Only eutil elink/esummary use parse_chunk()");
+ my $xs = XML::Simple->new();
+ if ($self->response) {
+ $self->throw("Lazy parsing not implemented for HTTP::Response data yet");
+ delete $self->{'_response'} if !$self->cache_response && $self->data_parsed;
+ } else { # has to be a file/filehandle
+ my $fh = $self->_fh;
+ my ($chunk, $seendoc, $line);
+ CHUNK:
+ while ($line = <$fh>) {
+ next unless $seendoc || $line =~ m{^<$tag>};
+ $seendoc = 1;
+ $chunk .= $line;
+ last if $line =~ m{^</$tag>};
+ }
+ if (!defined $line) {
+ $self->{'_parsed'} = 1;
+ return;
+ }
+ $self->_add_data(
+ $xs->XMLin($chunk, forcearray => $EUTIL_DATA{$eutil}, KeepRoot => 1)
+ );
+ }
+}
+
+}
+
+
+sub to_string {
+ my $self = shift;
+ $self->parse_data if ($self->can('parse_data') && !$self->data_parsed);
+ return sprintf("%-20s:%s\n\n", 'EUtil', $self->eutil);
+}
+
+
+sub print_all {
+ my ($self, @args) = @_;
+ $self->_print_handler(@args);
+}
+
+
+
+
+sub get_ids {
+ my ($self, $request) = @_;
+ my $eutil = $self->eutil;
+ if ($self->is_lazy) {
+ $self->warn('get_ids() not implemented when using lazy mode');
+ return;
+ }
+ $self->parse_data unless $self->data_parsed;
+ if ($eutil eq 'esearch') {
+ return $self->{'_id'} ? @{ $self->{'_id'} } : ();
+ } elsif ($eutil eq 'elink') {
+ my @ids;
+ if ($request) {
+ if (ref $request eq 'CODE') {
+ push @ids, map {$_->get_ids }
+ grep { $request->($_) } $self->get_LinkSets;
+ } else {
+ push @ids,
+ map { @{$_->[0]} }
+ grep {grep { $_ eq $request } @{$_->[1]}}
+ map {[[$_->get_ids], [$_->get_databases]]} $self->get_LinkSets;
+ }
+ } else {
+ $self->warn('Multiple database present, IDs will be globbed together')
+ if $self->get_linked_databases > 1;
+ push @ids, map {$_->get_ids } $self->get_LinkSets;
+ }
+ return @ids;
+ } elsif ($eutil eq 'esummary') {
+ unless (exists $self->{'_id'}) {
+ push @{$self->{'_id'}}, map {$_->get_id } $self->get_DocSums;
+ }
+ return @{$self->{'_id'}};
+ } elsif (my $pb = $self->parameter_base) {
+ my $ids = $pb->id;
+ return $ids ? @{$ids} : ();
+ } else {
+ return ()
+ }
+}
+
+
+sub get_database {
+ return ($_[0]->get_databases)[0];
+}
+
+
+sub get_db {
+ return shift->get_database;
+}
+
+
+sub get_databases {
+ my ($self, $db) = @_;
+ $self->parse_data unless $self->data_parsed;
+ my $eutil = $self->eutil;
+ my @dbs;
+ if ($eutil eq 'einfo' || $eutil eq 'espell') {
+ @dbs = $self->{'_dbname'} ||
+ $self->{'_database'} ||
+ $self->get_available_databases;
+ } elsif ($eutil eq 'egquery') {
+ @dbs = map {$_->get_database} ($self->get_GlobalQueries);
+ } elsif ($eutil eq 'elink') {
+ # only unique dbs
+ my %tmp;
+ @dbs = sort grep {!$tmp{$_}++}
+ map {($_->get_databases)} $self->get_LinkSets;
+ } elsif ($self->parameter_base) {
+ if ($self->parameter_base->eutil eq 'elink') {
+ @dbs = $self->parameter_base->dbfrom;
+ } else {
+ @dbs = $self->parameter_base->db;
+ }
+ }
+ return @dbs;
+}
+
+
+sub get_dbs {
+ return shift->get_databases;
+}
+
+
+sub next_History {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ $self->{'_histories_it'} = $self->generate_iterator('histories')
+ if (!exists $self->{'_histories_it'});
+ my $hist = $self->{'_histories_it'}->();
+}
+
+
+sub next_cookie {
+ return shift->next_History;
+}
+
+
+sub get_Histories {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ ref $self->{'_histories'} ? return @{ $self->{'_histories'} } : return ();
+}
+
+
+sub get_count {
+ my ($self, $db) = @_;
+ $self->parse_data unless $self->data_parsed;
+ # egquery
+ if ($self->datatype eq 'multidbquery') {
+ if (!$db) {
+ $self->warn('Must specify database to get count from');
+ return;
+ }
+ my ($gq) = grep {$_->get_database eq $db} $self->get_GlobalQueries;
+ $gq && return $gq->get_count;
+ $self->warn("Unknown database $db");
+ return;
+ } else {
+ return $self->{'_count'} || scalar($self->get_ids);
+ }
+}
+
+
+sub get_term {
+ my ($self, @args) = @_;
+ $self->parse_data unless $self->data_parsed;
+ $self->{'_term'} ? $self->{'_term'} :
+ $self->{'_query'} ? $self->{'_query'} :
+ $self->parameter_base ? $self->parameter_base->term :
+ return;
+}
+
+
+sub get_translation_from {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_translation'}->{'From'};
+}
+
+
+sub get_translation_to {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_translation'}->{'To'};
+}
+
+
+sub get_retstart {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_retstart'};
+}
+
+
+sub get_retmax {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_retmax'};
+}
+
+
+sub get_query_translation {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_querytranslation'};
+}
+
+
+sub get_corrected_query {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_correctedquery'};
+}
+
+
+sub get_replaced_terms {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ if ($self->{'_spelledquery'} && $self->{'_spelledquery'}->{Replaced}) {
+ ref $self->{'_spelledquery'}->{Replaced} ?
+ return @{ $self->{'_spelledquery'}->{Replaced} } : return ();
+ }
+}
+
+
+sub next_GlobalQuery {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ $self->{'_globalqueries_it'} = $self->generate_iterator('globalqueries')
+ if (!exists $self->{'_globalqueries_it'});
+ $self->{'_globalqueries_it'}->();
+}
+
+
+sub get_GlobalQueries {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ ref $self->{'_globalqueries'} ? return @{ $self->{'_globalqueries'} } : return ();
+}
+
+
+sub print_GlobalQueries {
+ my ($self, @args) = @_;
+ $self->_print_handler(@args, -type => 'GlobalQuery');
+}
+
+
+sub next_DocSum {
+ my $self = shift;
+ if(!$self->data_parsed && !$self->is_lazy) {
+ $self->parse_data;
+ }
+ $self->{'_docsums_it'} = $self->generate_iterator('docsums')
+ if (!exists $self->{'_docsums_it'});
+ $self->{'_docsums_it'}->();
+}
+
+
+sub get_DocSums {
+ my $self = shift;
+ if ($self->is_lazy) {
+ $self->warn('get_DocSums() not implemented when using lazy mode');
+ return ();
+ }
+ $self->parse_data unless $self->data_parsed;
+ return ref $self->{'_docsums'} ? @{ $self->{'_docsums'} } : return ();
+}
+
+
+sub print_DocSums {
+ my ($self, @args) = @_;
+ $self->_print_handler(@args, -type => 'DocSum');
+}
+
+
+sub get_available_databases {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ ($self->{'_available_databases'}) ?
+ return @{($self->{'_available_databases'})} :
+ return ();
+}
+
+
+sub get_record_count {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_count'}
+}
+
+
+sub get_last_update {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_lastupdate'}
+}
+
+
+sub get_menu_name {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ exists $self->{'_menuname'} ? return $self->{'_menuname'} :
+ exists $self->{'_menu'} ? return $self->{'_menu'} :
+ return;
+}
+
+
+sub get_description {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return $self->{'_description'};
+}
+
+
+sub next_FieldInfo {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ $self->{'_fieldinfo_it'} = $self->generate_iterator('fieldinfo')
+ if (!exists $self->{'_fieldinfo_it'});
+ $self->{'_fieldinfo_it'}->();
+}
+
+
+sub get_FieldInfo {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return ref $self->{'_fieldinfo'} ? @{ $self->{'_fieldinfo'} } : return ();
+}
+
+*get_FieldInfos = \&get_FieldInfo;
+
+
+sub next_LinkInfo {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ $self->{'_linkinfo_it'} = $self->generate_iterator('linkinfo')
+ if (!exists $self->{'_linkinfo_it'});
+ $self->{'_linkinfo_it'}->();
+}
+
+
+sub get_LinkInfo {
+ my $self = shift;
+ $self->parse_data unless $self->data_parsed;
+ return ref $self->{'_linkinfo'} ? @{ $self->{'_linkinfo'} } : return ();
+}
+
+*get_LinkInfos = \&get_LinkInfo;
+
+
+sub print_FieldInfo {
+ my ($self, @args) = @_;
+ $self->_print_handler(@args, -type => 'FieldInfo');
+}
+
+
+sub print_LinkInfo {
+ my ($self, @args) = @_;
+ $self->_print_handler(@args, -type => 'LinkInfo');
+}
+
+
+sub next_LinkSet {
+ my $self = shift;
+ #$self->parse_data unless $self->data_parsed;
+ if(!$self->data_parsed && !$self->is_lazy) {
+ $self->parse_data;
+ }
+ $self->{'_linksets_it'} = $self->generate_iterator('linksets')
+ if (!exists $self->{'_linksets_it'});
+ $self->{'_linksets_it'}->();
+}
+
+
+# add support for retrieval of data if lazy parsing is enacted
+
+sub get_LinkSets {
+ my $self = shift;
+ if ($self->is_lazy) {
+ $self->warn('get_LinkSets() not implemented when using lazy mode');
+ return ();
+ }
+ $self->parse_data unless $self->data_parsed;
+ return ref $self->{'_linksets'} ? @{ $self->{'_linksets'} } : return ();
+}
+
+
+sub print_LinkSets {
+ my ($self, @args) = @_;
+ $self->_print_handler(@args, -type => 'LinkSet');
+}
+
+
+sub get_linked_databases {
+ my $self = shift;
+ return $self->get_databases if $self->eutil eq 'elink';
+ return ();
+}
+
+
+{
+ my %VALID_ITERATORS = (
+ 'globalqueries' => 'globalqueries',
+ 'fieldinfo' => 'fieldinfo',
+ 'fieldinfos' => 'fieldinfo',
+ 'linkinfo' => 'linkinfo',
+ 'linkinfos' => 'linkinfo',
+ 'linksets' => 'linksets',
+ 'docsums' => 'docsums',
+ 'histories' => 'histories'
+ );
+
+
+sub rewind {
+ my ($self, $arg) = ($_[0], lc $_[1]);
+ my $eutil = $self->eutil;
+ if ($self->is_lazy) {
+ $self->warn('rewind() not implemented yet when running in lazy mode');
+ return;
+ }
+ $arg ||= 'all';
+ if (exists $VALID_ITERATORS{$arg}) {
+ delete $self->{'_'.$arg.'_it'};
+ } elsif ($arg eq 'all') {
+ for my $it (values %VALID_ITERATORS){
+ delete $self->{'_'.$it.'_it'} if
+ exists $self->{'_'.$it.'_it'};
+ map {$_->rewind('all')} $self->get_LinkSets;
+ map {$_->rewind('all')} $self->get_DocSums;
+ }
+ }
+}
+
+
+sub generate_iterator {
+ my ($self, $obj) = @_;
+ if (!$obj) {
+ $self->throw('Must provide object type to iterate');
+ } elsif (!exists $VALID_ITERATORS{$obj}) {
+ $self->throw("Unknown object type [$obj]");
+ }
+ my $cb = $self->callback;
+ if ($self->is_lazy) {
+ my $type = $self->eutil eq 'esummary' ? '_docsums' : '_linksets';
+ $self->{$type} = [];
+ return sub {
+ if (!@{$self->{$type}}) {
+ $self->parse_chunk; # fill the queue
+ }
+ while (my $obj = shift @{$self->{$type}}) {
+ if ($cb) {
+ ($cb->($obj)) ? return $obj : next;
+ } else {
+ return $obj;
+ }
+ }
+ undef;
+ }
+ } else {
+ my $loc = '_'.$VALID_ITERATORS{$obj};
+ my $index = $#{$self->{$loc}};
+ my $current = 0;
+ return sub {
+ while ($current <= $index) {
+ if ($cb) {
+ if (my $d = $cb->($self->{$loc}->[$current])) {
+ return $self->{$loc}->[$current++] }
+ else {
+ $current++;
+ next;
+ }
+ } else {
+ return $self->{$loc}->[$current++]
+ }
+ }
+ undef;
+ }
+ }
+}
+
+}
+
+
+sub callback {
+ my ($self, $cb) = @_;
+ if ($cb) {
+ delete $self->{'_cb'} if ($cb eq 'reset');
+ return if ref $cb ne 'CODE';
+ $self->{'_cb'} = $cb;
+ }
+ return $self->{'_cb'};
+}
+
+# Object printing methods
+
+{
+ my $DEF_HANDLER = sub {
+ my $obj = shift;
+ return $obj->to_string."\n";
+ };
+
+ my %HANDLER = (
+ 'DocSum' => 1,
+ 'FieldInfo' => 1,
+ 'LinkInfo' => 1,
+ 'GlobalQuery' => 1,
+ 'LinkSet' => 1,
+ 'all' => 1,
+ );
+
+ sub _print_handler {
+ my $self = shift;
+ my ($file, $fh, $cb, $wrap, $type, $all) = $self->_rearrange([qw(FILE FH CB WRAP TYPE ALL)], @_);
+ $type ||= 'all';
+
+ # default formatting delegates to_string
+ if (!$cb) {
+ $self->throw("Type $type not registered with print handler, exiting...")
+ if !exists($HANDLER{$type});
+ $cb = $DEF_HANDLER;
+ } else {
+ $self->throw("Callback must be a code reference") if ref $cb ne 'CODE';
+ }
+
+ $file ||= $fh;
+ $self->throw("Have defined both file and filehandle; only use one!") if $file && $fh;
+ my $io = ($file) ? Bio::Root::IO->new(-input => $file, -flush => 1) :
+ Bio::Root::IO->new(-flush => 1); # defaults to STDOUT
+
+ if ($type eq 'all') {
+ my $string = $cb->($self);
+ $io->_print($string) if $string;
+ } else {
+ # set up iterator
+ my $it = "next_$type";
+ $self->throw("Unknown iterator method $it") unless $self->can($it);
+ while (my $obj = $self->$it) {
+ my $string = $cb->($obj);
+ $io->_print($string) if $string;
+ }
+ }
+ $io->close;
+ }
+}
+
+# Private methods
+
+sub _seekable {
+ return shift->{'_seekable'}
+}
+
+# fixes odd bad XML issue espell data (still present 6-24-07)
+
+sub _fix_espell {
+ my ($self, $response) = @_;
+ my $temp;
+ my $type = ref($response);
+ if ($type eq 'GLOB') {
+ $temp .= $_ for <$response>;
+ } elsif ($type eq 'HTTP::Response') {
+ $temp = $response->content;
+ } else {
+ $self->throw("Unrecognized ref type $type");
+ }
+ if ($temp =~ m{^<html>}) {
+ $self->throw("NCBI espell nonrecoverable error: HTML content returned")
+ }
+ $temp =~ s{<ERROR>(.*?)<ERROR>}{<ERROR>$1</ERROR>};
+ return $temp;
+}
+
+sub _load_eutil_module {
+ my ($self, $class) = @_;
+ my $ok;
+ my $module = "Bio::Tools::EUtilities::" . $class;
+
+ eval {
+ $ok = $self->_load_module($module);
+ };
+ if ( $@ ) {
+ print STDERR <<END;
+$self: data module $module cannot be found
+Exception $@
+For more information about the EUtilities system please see the EUtilities docs.
+END
+ ;
+ }
+ return $ok;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities - NCBI eutil XML parsers.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # from file or fh
+ my $parser = Bio::Tools::EUtilities->new(
+ -eutil => 'einfo',
+ -file => 'output.xml'
+ );
+
+ # or HTTP::Response object...
+ my $parser = Bio::Tools::EUtilities->new(
+ -eutil => 'esearch',
+ -response => $response
+ );
+ # esearch, esummary, elink
+ @ids = $parser->get_ids(); # returns array or array ref of IDs
+
+ # egquery, espell
+
+ $term = $parser->get_term(); # returns array or array ref of IDs
+
+ # elink, einfo
+
+ $db = $parser->get_database(); # returns database
+
+ # Query-related methods (esearch, egquery, espell data)
+ # eutil data centered on use of search terms
+
+ my $ct = $parser->get_count; # uses optional database for egquery count
+ my $translation = $parser->get_count;
+
+ my $corrected = $parser->get_corrected_query; # espell
+
+ while (my $gquery = $parser->next_GlobalQuery) {
+ # iterates through egquery data
+ }
+
+ # Info-related methods (einfo data)
+ # database-related information
+
+ my $desc = $parser->get_description;
+ my $update = $parser->get_last_update;
+ my $nm = $parser->get_menu_name;
+ my $ct = $parser->get_record_count;
+
+ while (my $field = $parser->next_FieldInfo) {
+ # ...
+ }
+ while (my $field = $parser->next_LinkInfo) {
+ # ...
+ }
+
+ # History methods (epost data, some data returned from elink)
+ # data which enables one to retrieve and query against user-stored
+ # information on the NCBI server
+
+ while (my $cookie = $parser->next_History) {
+ # ...
+ }
+
+ my @hists = $parser->get_Histories;
+
+ # Bio::Tools::EUtilities::Summary (esummary data)
+ # information on a specific database record
+
+ # retrieve nested docsum data
+ while (my $docsum = $parser->next_DocSum) {
+ print "ID:",$docsum->get_ids,"\n";
+ while (my $item = $docsum->next_Item) {
+ # do stuff here...
+ while (my $listitem = $docsum->next_ListItem) {
+ # do stuff here...
+ while (my $listitem = $docsum->next_Structure) {
+ # do stuff here...
+ }
+ }
+ }
+ }
+
+ # retrieve flattened item list per DocSum
+ while (my $docsum = $parser->next_DocSum) {
+ my @items = $docsum->get_all_DocSum_Items;
+ }
+
+=head1 DESCRIPTION
+
+Parses NCBI eutils XML output for retrieving IDs and other information. Part of
+the BioPerl EUtilities system.
+
+This is a general parser for eutils XML; data from efetch is NOT parsed (this
+requires separate format-dependent parsers). All other XML for eutils is parsed.
+These modules can be used independently of Bio::DB::EUtilities and
+Bio::Tools::EUtilities::EUtilParameters; if used in this way, only data present
+in the XML will be parsed out (other bits are retrieved from a passed-in
+Bio::Tools::EUtilities::EUtilParameters instance used while querying the
+database)
+
+=head1 TODO
+
+This module is largely complete. However there are a few holes which will
+eventually be filled in. TranslationSets from esearch are not currently parsed,
+for instance.
+
+=head2 Constructor methods
+
+=head2 new
+
+ Title : new
+ Usage : my $parser = Bio::Tools::EUtilities->new(-file => 'my.xml',
+ -eutil => 'esearch');
+ Function : create Bio::Tools::EUtilities instance
+ Returns : new Bio::Tools::EUtilities instance
+ Args : -file/-fh - File or filehandle
+ -eutil - eutil parser to use (supports all but efetch)
+ -response - HTTP::Response object (optional)
+
+=head1 Bio::Tools::EUtilities methods
+
+=head2 cache_response
+
+ Title : cache_response
+ Usage : $parser->cache_response(1)
+ Function : sets flag to cache response object (off by default)
+ Returns : value eval'ing to TRUE or FALSE
+ Args : value eval'ing to TRUE or FALSE
+ Note : must be set prior to any parsing run
+
+=head2 response
+
+ Title : response
+ Usage : my $response = $parser->response;
+ Function : Get/Set HTTP::Response object
+ Returns : HTTP::Response
+ Args : HTTP::Response
+ Note : to prevent object from destruction set cache_response() to TRUE
+
+=head2 parameter_base
+
+ Title : parameter_base
+ Usage : my $response = $parser->parameter_base;
+ Function : Get/Set Bio::ParameterBaseI object (should be Bio::Tools::EUtilities::EUtilParameters)
+ Returns : Bio::Tools::EUtilities::EUtilParameters || undef
+ Args : (optional) Bio::Tools::EUtilities::EUtilParameters
+ Note : If this object is present, it may be used as a last resort for
+ some data values if parsed XML does not contain said values (for
+ instance, database, term, IDs, etc).
+
+=head2 data_parsed
+
+ Title : data_parsed
+ Usage : if ($parser->data_parsed) {...}
+ Function : returns TRUE if data has been parsed
+ Returns : value eval'ing to TRUE or FALSE
+ Args : none (set within parser)
+ Note : mainly internal method (set in case user wants to check
+ whether parser is exhausted).
+
+=head2 is_lazy
+
+ Title : is_lazy
+ Usage : if ($parser->is_lazy) {...}
+ Function : returns TRUE if parser is set to lazy parsing mode
+ (only affects elink/esummary)
+ Returns : Boolean
+ Args : none
+ Note : Permanently set in constructor. Still highly experimental.
+ Don't stare directly at happy fun ball...
+
+=head2 parse_data
+
+ Title : parse_data
+ Usage : $parser->parse_data
+ Function : direct call to parse data; normally implicitly called
+ Returns : none
+ Args : none
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Implemented in plugins
+
+=head2 print_all
+
+ Title : print_all
+ Usage : $info->print_all();
+ $info->print_all(-fh => $fh, -cb => $coderef);
+ Function : prints (dumps) all data in parser. Unless a coderef is supplied,
+ this just dumps the parser-specific to_string method to either a
+ file/fh or STDOUT
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is
+ passed in the parser object
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for einfo. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Bio::Tools::EUtilities::EUtilDataI methods
+
+=head2 eutil
+
+ Title : eutil
+ Usage : $eutil->$foo->eutil
+ Function : Get/Set eutil
+ Returns : string
+ Args : string (eutil)
+ Throws : on invalid eutil
+
+=head2 datatype
+
+ Title : datatype
+ Usage : $type = $foo->datatype;
+ Function : Get/Set data object type
+ Returns : string
+ Args : string
+
+=head1 Methods useful for multiple eutils
+
+=head2 get_ids
+
+ Title : get_ids
+ Usage : my @ids = $parser->get_ids
+ Function : returns array of requested IDs (see Notes for more specifics)
+ Returns : array
+ Args : [conditional] not required except when running elink queries against
+ multiple databases. In case of the latter, the database name is
+ optional but recommended when retrieving IDs as the ID list will
+ be globbed together. In such cases, if a db name isn't provided a
+ warning is issued as a reminder.
+ Notes : esearch : returned ID list
+ elink : returned ID list (see Args above for caveats)
+ all others : from parameter_base->id or undef
+
+=head2 get_database
+
+ Title : get_database
+ Usage : my $db = $info->get_database;
+ Function : returns single database name (eutil-compatible). This is the
+ queried database. For most eutils this is straightforward. For
+ elinks (which have 'db' and 'dbfrom') this is db/dbto, for egquery,
+ it is the first db in the list (you probably want get_databases
+ instead)
+ Returns : string
+ Args : none
+ Notes : egquery : first db in the query (you probably want get_databases)
+ einfo : the queried database
+ espell : the queried database
+ all others : from parameter_base->db or undef
+
+=head2 get_db (alias for get_database)
+
+=head2 get_databases
+
+ Title : get_databases
+ Usage : my @dbs = $parser->get_databases
+ Function : returns list of databases
+ Returns : array of strings
+ Args : none
+ Notes : This is guaranteed to return a list of databases. For a single
+ database use the convenience method get_db/get_database
+
+ egquery : list of all databases in the query
+ einfo : the queried database, or the available databases
+ espell : the queried database
+ elink : collected from each LinkSet
+ all others : from parameter_base->db or undef
+
+=head2 get_dbs (alias for get_databases)
+
+=head2 next_History
+
+ Title : next_History
+ Usage : while (my $hist=$parser->next_History) {...}
+ Function : returns next HistoryI (if present).
+ Returns : Bio::Tools::EUtilities::HistoryI (Cookie or LinkSet)
+ Args : none
+ Note : esearch, epost, and elink are all capable of returning data which
+ indicates search results (in the form of UIDs) is stored on the
+ remote server. Access to this data is wrapped up in simple interface
+ (HistoryI), which is implemented in two classes:
+ Bio::DB::EUtilities::History (the simplest) and
+ Bio::DB::EUtilities::LinkSet. In general, calls to epost and esearch
+ will only return a single HistoryI object (formerly known as a
+ Cookie), but calls to elink can generate many depending on the
+ number of IDs, the correspondence, etc. Hence this iterator, which
+ allows one to retrieve said data one piece at a time.
+
+=head2 next_cookie (alias for next_History)
+
+=head2 get_Histories
+
+ Title : get_Histories
+ Usage : my @hists = $parser->get_Histories
+ Function : returns list of HistoryI objects.
+ Returns : list of Bio::Tools::EUtilities::HistoryI (History or LinkSet)
+ Args : none
+
+=head1 Query-related methods
+
+=head2 get_count
+
+ Title : get_count
+ Usage : my $ct = $parser->get_count
+ Function : returns the count (hits for a search)
+ Returns : integer
+ Args : [CONDITIONAL] string with database name - used to retrieve
+ count from specific database when using egquery
+ Notes : egquery : count for specified database (specified above)
+ esearch : count for last search
+ all others : undef
+
+=head2 get_term
+
+ Title : get_term
+ Usage : $st = $qd->get_term;
+ Function : retrieve the term for the global search
+ Returns : string
+ Args : none
+ Notes : egquery : search term
+ espell : search term
+ esearch : from parameter_base->term or undef
+ all others : undef
+
+=head2 get_translation_from
+
+ Title : get_translation_from
+ Usage : $string = $qd->get_translation_from();
+ Function: portion of the original query replaced with translated_to()
+ Returns : string
+ Args : none
+ Note : only applicable for esearch
+
+=head2 get_translation_to
+
+ Title : get_translation_to
+ Usage : $string = $qd->get_translation_to();
+ Function: replaced string used in place of the original query term in translation_from()
+ Returns : string
+ Args : none
+ Note : only applicable for esearch
+
+=head2 get_retstart
+
+ Title : get_retstart
+ Usage : $start = $qd->get_retstart();
+ Function : retstart setting for the query (either set or NCBI default)
+ Returns : Integer
+ Args : none
+ Notes : esearch : retstart
+ esummary : retstart
+ all others : from parameter_base->retstart or undef
+
+=head2 get_retmax
+
+ Title : get_retmax
+ Usage : $max = $qd->get_retmax();
+ Function : retmax setting for the query (either set or NCBI default)
+ Returns : Integer
+ Args : none
+ Notes : esearch : retmax
+ esummary : retmax
+ all others : from parameter_base->retmax or undef
+
+=head2 get_query_translation
+
+ Title : get_query_translation
+ Usage : $string = $qd->get_query_translation();
+ Function: returns the translated query used for the search (if any)
+ Returns : string
+ Args : none
+ Notes : only applicable for esearch. This is the actual term used for
+ esearch.
+
+=head2 get_corrected_query
+
+ Title : get_corrected_query
+ Usage : my $cor = $eutil->get_corrected_query;
+ Function : retrieves the corrected query when using espell
+ Returns : string
+ Args : none
+ Notes : only applicable for espell.
+
+=head2 get_replaced_terms
+
+ Title : get_replaced_terms
+ Usage : my $term = $eutil->get_replaced_terms
+ Function : returns array of strings replaced in the query
+ Returns : string
+ Args : none
+ Notes : only applicable for espell
+
+=head2 next_GlobalQuery
+
+ Title : next_GlobalQuery
+ Usage : while (my $query = $eutil->next_GlobalQuery) {...}
+ Function : iterates through the queries returned from an egquery search
+ Returns : GlobalQuery object
+ Args : none
+ Notes : only applicable for egquery
+
+=head2 get_GlobalQueries
+
+ Title : get_GlobalQueries
+ Usage : @queries = $eutil->get_GlobalQueries
+ Function : returns list of GlobalQuery objects
+ Returns : array of GlobalQuery objects
+ Args : none
+ Notes : only applicable for egquery
+
+=head2 print_GlobalQueries
+
+ Title : print_GlobalQueries
+ Usage : $docsum->print_GlobalQueries();
+ $docsum->print_GlobalQueries(-fh => $fh, -callback => $coderef);
+ Function : prints item data for all global queries. The default printing
+ method is each item per DocSum is printed with relevant values if
+ present in a simple table using Text::Wrap.
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a GlobalQuery object;
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for esummary. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Summary-related methods
+
+=head2 next_DocSum
+
+ Title : next_DocSum
+ Usage : while (my $ds = $esum->next_DocSum) {...}
+ Function : iterate through DocSum instances
+ Returns : single Bio::Tools::EUtilities::Summary::DocSum
+ Args : none yet
+ Notes : only applicable for esummary
+
+=head2 get_DocSums
+
+ Title : get_DocSums
+ Usage : my @docsums = $esum->get_DocSums
+ Function : retrieve a list of DocSum instances
+ Returns : array of Bio::Tools::EUtilities::Summary::DocSum
+ Args : none
+ Notes : only applicable for esummary
+
+=head2 print_DocSums
+
+ Title : print_DocSums
+ Usage : $docsum->print_DocSums();
+ $docsum->print_DocSums(-fh => $fh, -cb => $coderef);
+ Function : prints item data for all docsums. The default data is generated
+ via DocSum::to_string
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a DocSum object
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for esummary. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Info-related methods
+
+=head2 get_available_databases
+
+ Title : get_available_databases
+ Usage : my @dbs = $info->get_available_databases
+ Function : returns list of available eutil-compatible database names
+ Returns : Array of strings
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_record_count
+
+ Title : get_record_count
+ Usage : my $ct = $eutil->get_record_count;
+ Function : returns database record count
+ Returns : integer
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_last_update
+
+ Title : get_last_update
+ Usage : my $time = $info->get_last_update;
+ Function : returns string containing time/date stamp for last database update
+ Returns : integer
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_menu_name
+
+ Title : get_menu_name
+ Usage : my $nm = $info->get_menu_name;
+ Function : returns string of database menu name
+ Returns : string
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 get_description
+
+ Title : get_description
+ Usage : my $desc = $info->get_description;
+ Function : returns database description
+ Returns : string
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 next_FieldInfo
+
+ Title : next_FieldInfo
+ Usage : while (my $field = $info->next_FieldInfo) {...}
+ Function : iterate through FieldInfo objects
+ Returns : Field object
+ Args : none
+ Notes : only applicable for einfo. Uses callback() for filtering if defined
+ for 'fields'
+
+=head2 get_FieldInfo
+
+ Title : get_FieldInfo
+ Usage : my @fields = $info->get_FieldInfo;
+ Function : returns list of FieldInfo objects
+ Returns : array (FieldInfo objects)
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 next_LinkInfo
+
+ Title : next_LinkInfo
+ Usage : while (my $link = $info->next_LinkInfo) {...}
+ Function : iterate through LinkInfo objects
+ Returns : LinkInfo object
+ Args : none
+ Notes : only applicable for einfo. Uses callback() for filtering if defined
+ for 'linkinfo'
+
+=head2 get_LinkInfo
+
+ Title : get_LinkInfo
+ Usage : my @links = $info->get_LinkInfo;
+ Function : returns list of LinkInfo objects
+ Returns : array (LinkInfo objects)
+ Args : none
+ Notes : only applicable for einfo.
+
+=head2 print_FieldInfo
+
+ Title : print_FieldInfo
+ Usage : $info->print_FieldInfo();
+ $info->print_FieldInfo(-fh => $fh, -cb => $coderef);
+ Function : prints link data for each FieldInfo object. The default is generated
+ via FieldInfo::to_string
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is
+ passed in a FieldInfo object
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for einfo. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head2 print_LinkInfo
+
+ Title : print_LinkInfo
+ Usage : $info->print_LinkInfo();
+ $info->print_LinkInfo(-fh => $fh, -cb => $coderef);
+ Function : prints link data for each LinkInfo object. The default is generated
+ via LinkInfo::to_string
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a LinkInfo object
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for einfo. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head1 Bio::Tools::EUtilities::Link-related methods
+
+=head2 next_LinkSet
+
+ Title : next_LinkSet
+ Usage : while (my $ls = $eutil->next_LinkSet {...}
+ Function : iterate through LinkSet objects
+ Returns : LinkSet object
+ Args : none
+ Notes : only applicable for elink. Uses callback() for filtering if defined
+ for 'linksets'
+
+=head2 get_LinkSets
+
+ Title : get_LinkSets
+ Usage : my @links = $info->get_LinkSets;
+ Function : returns list of LinkSets objects
+ Returns : array (LinkSet objects)
+ Args : none
+ Notes : only applicable for elink.
+
+=head2 print_LinkSets
+
+ Title : print_LinkSets
+ Usage : $info->print_LinkSets();
+ $info->print_LinkSets(-fh => $fh, -cb => $coderef);
+ Function : prints link data for each LinkSet object. The default is generated
+ via LinkSet::to_string
+ Returns : none
+ Args : [optional]
+ -file : file to print to
+ -fh : filehandle to print to (cannot be used concurrently with file)
+ -cb : coderef to use in place of default print method. This is passed
+ in a LinkSet object
+ -wrap : number of columns to wrap default text output to (def = 80)
+ Notes : only applicable for einfo. If -file or -fh are not defined,
+ prints to STDOUT
+
+=head2 get_linked_databases
+
+ Title : get_linked_databases
+ Usage : my @dbs = $eutil->get_linked_databases
+ Function : returns list of databases linked to in linksets
+ Returns : array of databases
+ Args : none
+ Notes : only applicable for elink. Now defers to get_databases.
+
+=head1 Iterator- and callback-related methods
+
+=head2 rewind
+
+ Title : rewind
+ Usage : $esum->rewind()
+ $esum->rewind('recursive')
+ Function : retrieve a list of DocSum instances
+ Returns : array of Bio::Tools::EUtilities::Summary::DocSum
+ Args : [optional] Scalar; string ('all') to reset all iterators, or string
+ describing the specific main object iterator to reset. The following
+ are recognized (case-insensitive):
+
+ 'all' - rewind all objects and also recursively resets nested object interators
+ (such as LinkSets and DocSums).
+ 'globalqueries' - GlobalQuery objects
+ 'fieldinfo' or 'fieldinfos' - FieldInfo objects
+ 'linkinfo' or 'linkinfos' - LinkInfo objects in this layer
+ 'linksets' - LinkSet objects
+ 'docsums' - DocSum objects
+ 'histories' - HistoryI objects (Cookies, LinkSets)
+
+=head2 generate_iterator
+
+ Title : generate_iterator
+ Usage : my $coderef = $esum->generate_iterator('linkinfo')
+ Function : generates an iterator (code reference) which iterates through
+ the relevant object indicated by the args
+ Returns : code reference
+ Args : [REQUIRED] Scalar; string describing the specific object to iterate.
+ The following are currently recognized (case-insensitive):
+
+ 'globalqueries'
+ 'fieldinfo' or 'fieldinfos' (the latter sounds clumsy, but I alias it JIC)
+ 'linkinfo' or 'linkinfos' (the latter sounds clumsy, but I alias it JIC)
+ 'linksets'
+ 'docsums'
+ 'histories'
+
+ Note : This function generates a simple coderef that one can use
+ independently of the various next_* functions (in fact, the next_*
+ functions use lazily created iterators generated via this method,
+ while rewind() merely deletes them so they can be regenerated on the
+ next call).
+
+ A callback specified using callback() will be used to filter objects
+ for any generated iterator. This behaviour is implemented for both
+ normal and lazy iterator types and is the default. If you don't want
+ this, make sure to reset any previously set callbacks via
+ reset_callback() (which just deletes the code ref). Note that setting
+ callback() also changes the behavior of the next_* functions as the
+ iterators are generated here (as described above); this is a feature
+ and not a bug.
+
+ 'Lazy' iterators are considered an experimental feature and may be
+ modified in the future. A 'lazy' iterator, which loops through and
+ returns objects as they are created (instead of creating all data
+ instances up front, then iterating through) is returned if the
+ parser is set to 'lazy' mode. This mode is only present for elink
+ and esummary output as they are the two formats parsed which can
+ generate potentially thousands of individual objects (note efetch
+ isn't parsed, so isn't counted). Use of rewind() with these
+ iterators is not supported for the time being as we can't guarantee
+ you can rewind(), as this depends on whether the data source is
+ seek()able and thus 'rewindable'. We will add rewind() support at a
+ later time which will work for 'seekable' data or possibly cached
+ objects via Storable or BDB.
+
+=head2 callback
+
+ Title : callback
+ Usage : $parser->callback(sub {$_[0]->get_database eq 'protein'});
+ Function : Get/set callback code ref used to filter returned data objects
+ Returns : code ref if previously set
+ Args : single argument:
+ code ref - evaluates a passed object and returns true or false value
+ (used in iterators)
+ 'reset' - string, resets the iterator.
+ returns upon any other args
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/EUtilDataI.pm b/lib/Bio/Tools/EUtilities/EUtilDataI.pm
new file mode 100644
index 0000000..84903fa
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/EUtilDataI.pm
@@ -0,0 +1,216 @@
+package Bio::Tools::EUtilities::EUtilDataI;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::EUtilDataI::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::RootI);
+use Text::Wrap qw(wrap);
+
+# ABSTRACT: EUtil data object interface.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+{
+ my %VALID_EUTILS = map {$_ => 1} qw(esearch epost espell egquery elink einfo esummary);
+
+sub eutil {
+ my ($self, $eutil) = @_;
+ if ($eutil) {
+ $self->throw("$eutil not supported") if !exists $VALID_EUTILS{$eutil};
+ return $self->{'_eutil'} = $eutil;
+ }
+ return $self->{'_eutil'};
+}
+
+}
+
+
+sub datatype {
+ my $self = shift;
+ return $self->{'_type'} = shift if @_;
+ return $self->{'_type'};
+}
+
+
+sub rewind {
+ shift->warn("Object may not need an iterator. Please check the documentation.");
+}
+
+
+sub _add_data {
+ shift->throw_not_implemented;
+}
+
+
+sub to_string {
+ shift->throw_not_implemented;
+}
+
+
+sub _text_wrap {
+ shift;
+ return wrap(@_);
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::EUtilDataI - EUtil data object interface.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # say you had some data in a hash ref ($data) and wanted to create hierarchies
+ # of object using the same interface, starting with the topmost...
+
+ # $object is a Bio::Tools::EUtilities::EUtilDataI instance
+
+ $object->_add_data($data);
+
+ # in _add_data()... sort through keys and create subobjects as needed
+
+ if ($key eq 'foo') {
+ my $sub = Bio::Tools::EUtilities::FooData->new(-eutil => 'efoo',
+ -type => 'foo');
+ $sub->_add_data($subdata);
+ # store into parent object as needed...
+ ...
+ }
+
+ # access stored data
+
+ while (my $sub = $parent->next_Foo) {...}
+
+=head1 DESCRIPTION
+
+This is a simple interface which allows creation of simple typed object
+hierarchies. Single layers can be accessed via simple iterators (next_* methods)
+or retrieved all at once (get_*) methods; nested data can be iterated through
+nested iterators for each object, or retrieved using get_all_* methods.
+
+This interface defines common methods required for all eutil data-holding
+objects: _add_data(), eutil(), and type(). It also specifies inheriting
+interface classes use at least one of three methods: get_ids(), get_term(), or
+get_database(), which are the three types of data that eutils mainly centers on.
+
+Generally, eutil() is the Bio::Tools::EUtilities parser used to set the data.
+Similarly, datatype() is the specific data type for the class.
+
+Implementations which rely on subclasses to store data and have iterators should
+also define a generalized rewind() method that (by default) rewinds all
+iterators to the start. Args passed can specify exactly which iterator to rewind
+and (if possible) recursively rewind nested object iterators.
+
+As the method implies, _add_data() is a private method that adds data chunks to
+the object and sets internal parameters for the various data objects. Methods
+corresponding to the data type simply return the set data or iterate through the
+data sets if the values are more complex. Data can alternatively be passed
+through the object constructor.
+
+=head2 eutil
+
+ Title : eutil
+ Usage : $eutil->$foo->eutil
+ Function : Get/Set eutil
+ Returns : string
+ Args : string (eutil)
+ Throws : on invalid eutil
+
+=head2 datatype
+
+ Title : type
+ Usage : $type = $qd->datatype;
+ Function: retrieve simple data type object holds (linkset, docsum, item, etc)
+ Returns : string (eutil name)
+ Args : none
+ Note : this is probably more useful for devs than for users as a way to keep
+ track of the various types of modules used
+
+=head2 rewind
+
+ Title : rewind
+ Usage : $esum->rewind
+ Function : rewinds the requested iterator
+ Returns : none
+ Args : [OPTIONAL] may include 'all', 'recursive', etc.
+
+=head2 _add_data
+
+ Title : _add_data
+ Usage : $foo->_add_data($data)
+ Function : adds data to current object as a chunk
+ Returns : none
+ Args : hash ref containing relevant data
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for the print_* methods
+
+=head2 _text_wrap
+
+ Title : _text_wrap
+ Usage : $foo->_text_wrap($string)
+ Function : private internal wrapper for Text::Wrap::wrap
+ Returns : string
+ Args : string
+ Note : Internal use only. Simple wrapper method.
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/EUtilParameters.pm b/lib/Bio/Tools/EUtilities/EUtilParameters.pm
new file mode 100644
index 0000000..0c35667
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/EUtilParameters.pm
@@ -0,0 +1,700 @@
+package Bio::Tools::EUtilities::EUtilParameters;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::EUtilParameters::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::ParameterBaseI);
+use URI;
+use HTTP::Request;
+use Bio::Root::IO;
+
+# ABSTRACT: Manipulation of NCBI eutil-based parameters for remote database requests.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+# eutils only has one hostbase URL
+
+# mode : GET or POST (HTTP::Request)
+# location : CGI location
+# params : allowed parameters for that eutil
+my %MODE = (
+ 'einfo' => {
+ 'mode' => ['GET'],
+ 'location' => 'einfo.fcgi',
+ 'params' => [qw(db tool email)],
+ },
+ 'epost' => {
+ 'mode' => ['POST','GET'],
+ 'location' => 'epost.fcgi',
+ 'params' => [qw(db retmode id tool email idtype WebEnv query_key)],
+ },
+ 'efetch' => {
+ 'mode' => ['GET','POST'],
+ 'location' => 'efetch.fcgi',
+ 'params' => [qw(db retmode id retmax retstart rettype strand seq_start
+ seq_stop complexity report tool email idtype WebEnv query_key)],
+ },
+ 'esearch' => {
+ 'mode' => ['GET','POST'],
+ 'location' => 'esearch.fcgi',
+ 'params' => [qw(db retmode usehistory term field reldate mindate
+ maxdate datetype retmax retstart rettype sort tool email idtype
+ WebEnv query_key)],
+ },
+ 'esummary' => {
+ 'mode' => ['GET','POST'],
+ 'location' => 'esummary.fcgi',
+ 'params' => [qw(db retmode id retmax retstart rettype tool email idtype
+ version WebEnv query_key)],
+ },
+ 'elink' => {
+ 'mode' => ['GET','POST'],
+ 'location' => 'elink.fcgi',
+ 'params' => [qw(db retmode id reldate mindate maxdate datetype term
+ dbfrom holding cmd version tool email idtype linkname WebEnv
+ query_key)],
+ },
+ 'egquery' => {
+ 'mode' => ['GET','POST'],
+ 'location' => 'egquery.fcgi',
+ 'params' => [qw(term retmode tool email)],
+ },
+ 'espell' => {
+ 'mode' => ['GET','POST'],
+ 'location' => 'espell.fcgi',
+ 'params' => [qw(db retmode term tool email )],
+ }
+);
+
+my @PARAMS;
+
+# generate getter/setters (will move this into individual ones at some point)
+
+BEGIN {
+ @PARAMS = qw(db id email retmode rettype usehistory term field tool
+ reldate mindate maxdate datetype retstart retmax sort seq_start seq_stop
+ strand complexity report dbfrom cmd holding version linkname WebEnv
+ query_key idtype);
+ for my $method (@PARAMS) {
+ eval <<END;
+sub $method {
+ my (\$self, \$val) = \@_;
+ if (defined \$val) {
+ if ((!defined \$self->{'_$method'}) ||
+ (defined \$self->{'_$method'} && \$self->{'_$method'} ne \$val)) {
+ \$self->{'_statechange'} = 1;
+ \$self->{'_$method'} = \$val;
+ }
+ }
+ return \$self->{'_$method'};
+}
+END
+ }
+}
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+ my ($retmode) = $self->_rearrange(["RETMODE"], at args);
+ # order is important here, eutil must be set first so that proper error
+ # checking occurs for the later attributes
+ $self->_set_from_args(\@args,
+ -methods => [@PARAMS, qw(eutil history correspondence id_file request_mode)]);
+ $self->eutil() || $self->eutil('efetch');
+ $self->tool() || $self->tool('BioPerl');
+ # set default retmode if not explicitly set
+ $self->set_default_retmode if (!$retmode);
+ $self->{'_statechange'} = 1;
+ return $self;
+}
+
+
+sub set_parameters {
+ my ($self, @args) = @_;
+ # allow automated resetting; must check to ensure that retmode isn't explicitly passed
+ my ($newmode,$file) = $self->_rearrange([qw(RETMODE ID_FILE)], at args);
+ $self->_set_from_args(\@args, -methods => [@PARAMS, qw(eutil correspondence history)]);
+ # set default retmode if not explicitly passed
+ $self->set_default_retmode unless $newmode;
+ $file && $self->id_file($file);
+ return;
+}
+
+
+sub reset_parameters {
+ my ($self, @args) = @_;
+ # is there a better way of doing this? probably, but this works...
+ my ($retmode,$file) = $self->_rearrange([qw(RETMODE ID_FILE)], at args);
+ map { defined $self->{"_$_"} && undef $self->{"_$_"} } (@PARAMS, qw(eutil correspondence history_cache request_cache));
+ $self->_set_from_args(\@args, -methods => [@PARAMS, qw(eutil correspondence history)]);
+ $self->eutil() || $self->eutil('efetch');
+ $self->set_default_retmode unless $retmode;
+ $file && $self->id_file($file);
+ $self->{'_statechange'} = 1;
+}
+
+
+sub carryover {
+ my ($self, $params, $mode) = @_;
+ my %allowed = map {$_ => 1} (@PARAMS, qw(eutil history correspondence));
+ if ($params) {
+ $self->throw("Must pass in an array ref of parameters") unless
+ ref($params) eq 'ARRAY';
+ my $mode ||= 'add';
+ $self->throw("Mode must be 'add' or 'delete'") unless $mode eq 'add' || $mode eq 'delete';
+ if (!scalar(@$params)) { # empty array ref
+ $self->{_carryover} = {};
+ } else {
+ for my $p (@$params) {
+ if (!exists $allowed{$p}) {
+ $self->warn("$p is not a recognized eutil parameter");
+ next;
+ }
+ if ($mode eq 'add') {
+ $self->{_carryover}->{$p} = 1;
+ } else {
+ delete $self->{_carryover}->{$p} if exists
+ $self->{_carryover}->{$p};
+ }
+ }
+ }
+ }
+ sort keys %{$self->{_carryover}} || ();
+}
+
+sub _reset_except_carryover {
+ my $self = shift;
+ #for my $p (@PARAMS, qw(eutil correspondence history_cache request_cache)) {
+ # undef $self->{"_$p"} if defined $self->{"_$p"};
+ #}
+}
+
+
+sub request_mode {
+ my ($self, $mode) = @_;
+ $mode = uc $mode if defined $mode;
+ my $eutil = $self->eutil;
+ if ($mode) {
+ my %valid = map {$_ => 1} @{$MODE{$eutil}{mode}};
+ $self->throw("Mode $mode not supported for $eutil") unless
+ exists $valid{$mode};
+ $self->{_request_mode} = $mode;
+ }
+ return $self->{_request_mode} if $self->{_request_mode};
+ # let's try to make this a bit smarter...
+
+ # If not explicitly set, in cases where
+ # the number of IDs is greater than 200, or the search term is longer than
+ # 200, use POST when available
+
+ if (scalar(@{$MODE{$eutil}{mode}}) > 1) { # allows both GET and POST
+ my ($id, $term) = ($self->id || [], $self->term || '');
+ if (ref $id eq 'ARRAY' && scalar(@$id) > 200 || CORE::length($term) > 300) {
+ return 'POST'
+ }
+ }
+ # otherwise, fallback to default
+ $MODE{$eutil}{mode}[0]; # first is default
+}
+
+
+sub parameters_changed {
+ my ($self) = @_;
+ $self->{'_statechange'};
+}
+
+
+sub available_parameters {
+ my ($self, $type) = @_;
+ $type ||= 'all';
+ if ($type eq 'all') {
+ return @PARAMS;
+ } else {
+ $self->throw("$type parameters not supported") if !exists $MODE{$type};
+ return @{$MODE{$type}->{params}};
+ }
+}
+
+
+sub get_parameters {
+ my ($self, @args) = @_;
+ my ($type, $list, $join) = $self->_rearrange([qw(TYPE LIST JOIN_IDS)], @args);
+ $self->throw("Parameter list not an array ref") if $list && ref $list ne 'ARRAY';
+ $type ||= '';
+ my @final = $list ? grep {$self->can($_)} @{$list} : $self->available_parameters($type);
+ my @p;
+ for my $param (@final) {
+ if ($param eq 'id' && $self->id && $join) {
+ my $id = $self->id;
+ if ($self->correspondence && $self->eutil eq 'elink') {
+ for my $id_group (@{ $id }) {
+ if (ref($id_group) eq 'ARRAY') {
+ push @p, ('id' => join(q(,), @{ $id_group }));
+ }
+ elsif (!ref($id_group)) {
+ push @p, ('id' => $id_group);
+ }
+ else {
+ $self->throw("Unknown ID type: $id_group");
+ }
+ }
+ } else {
+ # add a check for undef
+ push @p, ref $id eq 'ARRAY' ?
+ ($param => join(',', grep {defined($_)} @{ $id })):
+ ($param => $id);
+ }
+ }
+ elsif ($param eq 'db' && $self->db && $join) {
+ my $db = $self->db;
+ push @p, (ref $db eq 'ARRAY') ?
+ ($param => join(',', @{ $db })) :
+ ($param => $db) ;
+ }
+ else {
+ push @p, ($param => $self->{"_$param"}) if defined $self->{"_$param"};
+ }
+ }
+ return @p;
+}
+
+
+sub to_string {
+ my ($self, @args) = @_;
+ # calling to_uri changes the state
+ if ($self->parameters_changed || !defined $self->{'_string_cache'}) {
+ my $string = $self->to_request(@args)->uri->as_string;
+ $self->{'_statechange'} = 0;
+ $self->{'_string_cache'} = $string;
+ }
+ return $self->{'_string_cache'};
+}
+
+
+sub to_request {
+ my ($self, $type) = @_;
+ if ($self->parameters_changed || !defined $self->{'_request_cache'}) {
+ my $eutil = $self->eutil;
+ $self->throw("No eutil set") if !$eutil;
+ #set default retmode
+ $type ||= $eutil;
+ my ($location, $mode) = ($MODE{$eutil}->{location}, $self->request_mode);
+ my $request;
+ my $uri = URI->new($self->url_base_address . $location);
+ if ($mode eq 'GET') {
+ $uri->query_form($self->get_parameters(-type => $type, -join_ids => 1) );
+ $request = HTTP::Request->new($mode => $uri);
+ $self->{'_request_cache'} = $request;
+ } elsif ($mode eq 'POST') {
+ $request = HTTP::Request->new($mode => $uri->as_string);
+ $uri->query_form($self->get_parameters(-type => $type, -join_ids => 1) );
+ $request->content_type('application/x-www-form-urlencoded');
+ $request->content($uri->query);
+ $self->{'_request_cache'} = $request;
+ } else {
+ $self->throw("Unrecognized request mode: $mode");
+ }
+ $self->{'_statechange'} = 0;
+ $self->{'_request_cache'} = $request;
+ }
+ return $self->{'_request_cache'};
+}
+
+
+sub eutil {
+ my ($self, $eutil) = @_;
+ if ($eutil) {
+ $self->throw("$eutil not supported") if !exists $MODE{$eutil};
+ if (!defined $self->{'_eutil'} || ($self->{'_eutil'} && $self->{'_eutil'} ne $eutil)) {
+ $self->{'_eutil'} = $eutil;
+ $self->{'_statechange'} = 1;
+ }
+ }
+ return $self->{'_eutil'};
+}
+
+
+sub history {
+ my ($self, $history) = @_;
+ if ($history) {
+ $self->throw('Not a Bio::Tools::EUtilities::HistoryI object!') if
+ !$history->isa('Bio::Tools::EUtilities::HistoryI');
+ my ($webenv, $qkey) = $history->history;
+ $self->WebEnv($webenv);
+ $self->query_key($qkey);
+ $self->{'_statechange'} = 1;
+ $self->{'_history_cache'} = $history;
+ }
+ return $self->{'_history_cache'};
+}
+
+
+sub correspondence {
+ my ($self, $corr) = @_;
+ if (defined $corr) {
+ $self->{'_correspondence'} = $corr;
+ $self->{'_statechange'} = 1;
+ }
+ return $self->{'_correspondence'};
+}
+
+
+sub id_file {
+ my ($self, $file) = @_;
+ if ($file) {
+ # do this in a way that allows file, fh, IO::Handle
+ my $io = $self->_io;
+ $io->_initialize_io(-input => $file);
+ my @ids;
+ while (my $line = $io->_readline) {
+ chomp $line;
+ push @ids, $line;
+ }
+ $self->_io->close;
+ $self->id(\@ids);
+ }
+}
+
+
+{
+ my $HOSTBASE = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
+
+ sub url_base_address {
+ my ($self, $address) = @_;
+ return $HOSTBASE;
+ }
+}
+
+
+{
+ # default retmode if one is not supplied
+ my %NCBI_DATABASE = (
+ 'protein' => 'text',
+ 'nucleotide' => 'text',
+ 'nuccore' => 'text',
+ 'nucgss' => 'text',
+ 'nucest' => 'text',
+ 'structure' => 'text',
+ 'genome' => 'text',
+ 'gene' => 'asn1',
+ 'journals' => 'text',
+ );
+
+ sub set_default_retmode {
+ my $self = shift;
+ if ($self->eutil eq 'efetch') {
+ my $db = $self->db || return; # assume retmode will be set along with db
+ my $mode = exists $NCBI_DATABASE{$db} ? $NCBI_DATABASE{$db} : 'xml';
+ $self->retmode($mode);
+ } else {
+ $self->retmode('xml');
+ }
+ }
+}
+
+sub _io {
+ my $self = shift;
+ if (!defined $self->{'_io'}) {
+ $self->{'_io'} = Bio::Root::IO->new();
+ }
+ return $self->{'_io'};
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::EUtilParameters - Manipulation of NCBI eutil-based parameters for remote database requests.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # Bio::Tools::EUtilities::EUtilParameters implements Bio::ParameterBaseI
+
+ my @params = (-eutil => 'efetch',
+ db => 'nucleotide',
+ id => \@ids,
+ email => 'me at foo.bar',
+ retmode => 'xml');
+
+ my $p = Bio::Tools::EUtilities::EUtilParameters->new(@params);
+
+ if ($p->parameters_changed) {
+ # ...
+ } # state information
+
+ $p->set_parameters(@extra_params); # set new NCBI parameters, leaves others preset
+
+ $p->reset_parameters(@new_params); # reset NCBI parameters to original state
+
+ $p->to_string(); # get a URI-encoded string representation of the URL address
+
+ $p->to_request(); # get an HTTP::Request object (to pass on to LWP::UserAgent)
+
+=head1 DESCRIPTION
+
+Bio::Tools::EUtilities::EUtilParameters is-a Bio::ParameterBaseI implementation
+that allows simple manipulation of NCBI eutil parameters for CGI-based queries.
+SOAP-based methods may be added in the future.
+
+For simplicity parameters do not require dashes when passed and do not need URI
+encoding (spaces are converted to '+', symbols encoded, etc). Also, the
+following extra parameters can be passed to the new() constructor or via
+set_parameters() or reset_parameters():
+
+ eutil - the eutil to be used. The default is 'efetch' if not set.
+ correspondence - Flag for how IDs are treated. Default is undef (none).
+ history - a Bio::Tools::EUtilities::HistoryI object. Default is undef (none).
+
+At this point minimal checking is done for potential errors in parameter
+passing, though these should be easily added in the future when necessary.
+
+=head1 Bio::ParameterBaseI implemented methods
+
+=head2 set_parameters
+
+ Title : set_parameters
+ Usage : $pobj->set_parameters(@params);
+ Function: sets the NCBI parameters listed in the hash or array
+ Returns : None
+ Args : [optional] hash or array of parameter/values.
+ Note : This sets any parameter passed but leaves previously set data alone.
+ In addition to regular eutil-specific parameters, you can set the
+ following:
+
+ -eutil - the eUtil to be used (default 'efetch')
+ -history - pass a HistoryI-implementing object, which
+ sets the WebEnv, query_key, and possibly db and linkname
+ (the latter two only for LinkSets)
+ -correspondence - Boolean flag, set to TRUE or FALSE; indicates how
+ IDs are to be added together for elink request where
+ ID correspondence might be needed
+ (default 0)
+
+=head2 reset_parameters
+
+ Title : reset_parameters
+ Usage : resets values
+ Function: resets parameters to either undef or value in passed hash
+ Returns : none
+ Args : [optional] hash of parameter-value pairs
+ Note : This sets any parameter passed, but resets all others (deletes them).
+ In addition to regular eutil-specific parameters, you can set the
+ following:
+
+ -eutil - the eUtil to be used (default 'efetch')
+ -history - pass a HistoryI-implementing object, which
+ sets the WebEnv, query_key, and possibly db and linkname
+ (the latter two only for LinkSets)
+ -correspondence - Boolean flag, set to TRUE or FALSE; indicates how
+ IDs are to be added together for elink request where
+ ID correspondence might be needed
+ (default 0)
+
+=head2 carryover
+
+ Title : carryover
+ Usage : $obj->carryover(qw(email tool db))
+ Function : Carries over the designated parameters when using reset_parameters()
+ Returns : a list of carried-over parameters
+ Args : An array reference of parameters to carry over, followed optionally
+ by the mode ('add' or 'delete', indicating whether to append to or
+ remove the specified values passed in). To clear all values, pass in
+ an empty array reference (the mode in this case doesn't matter).
+
+ In addition to the normal eUtil-specific parameters, the following
+ additional parameters are allowed:
+
+ -eutil - the eUtil to be used (default 'efetch')
+ -history - pass a HistoryI-implementing object, which
+ sets the WebEnv, query_key, and possibly db and linkname
+ (the latter two only for LinkSets)
+ -correspondence - Boolean flag, set to TRUE or FALSE; indicates how
+ IDs are to be added together for elink request where
+ ID correspondence might be needed
+ (default 0)
+ Default : None (no carried over parameters)
+ Status : NYI (dev in progress, carry on, nothing to see here)
+
+=head2 request_mode
+
+ Title : request_mode
+ Usage : $obj->request_mode
+ Function : get/set the mode for the user agent to use for generating a request
+ Returns : either a preset mode (checked against the eutil) or a best-possible
+ option based upon the currently-set parameters
+ Args :
+ Status :
+
+=head2 parameters_changed
+
+ Title : parameters_changed
+ Usage : if ($pobj->parameters_changed) {...}
+ Function: Returns TRUE if parameters have changed
+ Returns : Boolean (0 or 1)
+ Args : [optional] Boolean
+
+=head2 available_parameters
+
+ Title : available_parameters
+ Usage : @params = $pobj->available_parameters()
+ Function: Returns a list of the available parameters
+ Returns : Array of available parameters (no values)
+ Args : [optional] A string with the eutil name (for returning eutil-specific
+ parameters)
+
+=head2 get_parameters
+
+ Title : get_parameters
+ Usage : @params = $pobj->get_parameters;
+ %params = $pobj->get_parameters;
+ Function: Returns list of key/value pairs, parameter => value
+ Returns : Flattened list of key-value pairs. All key-value pairs returned,
+ though subsets can be returned based on the '-type' parameter. Data
+ originally set as an array ref are returned based on whether the
+ '-join_id' flag is set (default is the same array ref).
+ Args : -type : the eutil name (Default: returns all). Use of '-list'
+ supercedes this
+ -list : array ref of specific parameters
+ -join_ids : Boolean; join IDs based on correspondence (Default: no join)
+
+=head1 Implementation-specific to_* methods
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $string = $pobj->to_string;
+ Function: Returns string (URL only in this case)
+ Returns : String (URL only for now)
+ Args : [optional] 'all'; build URI::https using all parameters
+ Default : Builds based on allowed parameters (presence of history data
+ or eutil type in %MODE).
+ Note : Changes state of object. Absolute string
+
+=head2 to_request
+
+ Title : to_request
+ Usage : $uri = $pobj->to_request;
+ Function: Returns HTTP::Request object
+ Returns : HTTP::Request
+ Args : [optional] 'all'; builds request using all parameters
+ Default : Builds based on allowed parameters (presence of history data
+ or eutil type in %MODE).
+ Note : Changes state of object (to boolean FALSE). Used for CGI-based GET/POST
+ TODO : esearch, esummary, elink now accept POST for batch submission
+ (something NCBI apparently allowed but didn't advertise). Should we
+ switch most of these to utilize POST instead, or make it dep on the
+ number of submitted IDs?
+
+=head1 Implementation specific-methods
+
+=head2 eutil
+
+ Title : eutil
+ Usage : $p->eutil('efetch')
+ Function: gets/sets the eutil for this set of parameters
+ Returns : string (eutil)
+ Args : [optional] string (eutil)
+ Throws : '$eutil not supported' if eutil not present
+ Note : This does not reset retmode to the default if called directly.
+
+=head2 history
+
+ Title : history
+ Usage : $p->history($history);
+ Function: gets/sets the history object to be used for these parameters
+ Returns : Bio::Tools::EUtilities::HistoryI (if set)
+ Args : [optional] Bio::Tools::EUtilities::HistoryI
+ Throws : Passed something other than a Bio::Tools::EUtilities::HistoryI
+ Note : This overrides WebEnv() and query_key() settings when set. This
+ caches the last history object passed and returns like a Get/Set
+
+=head2 correspondence
+
+ Title : correspondence
+ Usage : $p->correspondence(1);
+ Function: Sets flag for posting IDs for one-to-one correspondence
+ Returns : Boolean
+ Args : [optional] boolean value
+
+=head2 id_file
+
+ Title : id_file
+ Usage : $p->id_file('<foo');
+ Function: convenience method; passes in file containing a list of IDs for
+ searches (one per line), sets id() to list
+ Returns : none
+ Args : either string indicating file to use, a file handle, or an IO::Handle
+ object
+ Note : use of this overrides concurrent use of the '-id' parameter when both
+ are passed. The filename is not retained, merely parsed for IDs.
+
+=head2 url_base_address
+
+ Title : url_base_address
+ Usage : $address = $p->url_base_address();
+ Function: Get URL base address
+ Returns : String
+ Args : None in this implementation; the URL is fixed
+
+=head2 set_default_retmode
+
+ Title : set_default_retmode
+ Usage : $p->set_default_retmode();
+ Function: sets retmode to default value specified by the eutil() and the value
+ in %NCBI_DATABASE (for efetch only) if called
+ Returns : none
+ Args : none
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/History.pm b/lib/Bio/Tools/EUtilities/History.pm
new file mode 100644
index 0000000..b3b65d1
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/History.pm
@@ -0,0 +1,171 @@
+package Bio::Tools::EUtilities::History;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::History::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::HistoryI);
+
+# ABSTRACT: Lightweight implementation of HistoryI interface (not bound to filehandles, extraneous methods, etc).
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+ my ($eutil) = $self->_rearrange([qw(eutil)], at args);
+ $eutil || $self->throw('eutil not defined');
+ $self->eutil($eutil);
+ $self->datatype('history');
+ return $self;
+}
+
+
+
+
+sub _add_data {
+ my ($self, $simple) = @_;
+ if (!exists $simple->{WebEnv} || !exists $simple->{QueryKey}) {
+ $self->debug("Data:",Dumper($simple));
+ $self->throw("Missing webenv/query key in history output");
+ }
+ $self->{_webenv} = $simple->{WebEnv} && delete $simple->{WebEnv};
+ $self->{_querykey} = $simple->{QueryKey} && delete $simple->{QueryKey};
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $string;
+ my %map = (
+ 'get_webenv' => 'WebEnv',
+ 'get_query_key' => 'Key'
+ );
+ for my $m (qw(get_webenv get_query_key)) {
+ $string .= sprintf("%-20s:%s\n", $map{$m}, $self->$m);
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::History - Lightweight implementation of HistoryI interface (not bound to filehandles, extraneous methods, etc).
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ #should work for any class which is-a HistoryI
+
+ if ($obj->has_History) {
+ # do something here
+ }
+
+ ($webenv, $querykey) = $obj->history;
+
+ $obj->history($webenv, $querykey);
+
+ $webenv = $obj->get_webenv;
+
+ $query_key = $obj->get_query_key;
+
+=head1 DESCRIPTION
+
+This class extends methods for any EUtilDataI implementation allow instances to
+dealwith NCBI history data (WebEnv and query_key). These can be used as
+parameters for further queries against data sets stored on the NCBI server, much
+like NCBI's Entrez search history. These are important when one wants to run
+complex queries using esearch, retrieve related data using elink, and retrieve
+large datasets using epost/efetch.
+
+This class is the simplest implementation and merely holds data for future
+queries from any HistoryI. See also Bio::Tools::EUtilities::Query (esearch) and
+Bio::Tools::EUtilities::LinkSet (elink), which also implement HistoryI.
+
+=head2 history
+
+ Title : history
+ Usage : my ($webenv, $qk) = $hist->history
+ Function : Get/Set two-element list of webenv() and query_key()
+ Returns : array
+ Args : two-element list of webenv, query key
+
+=head2 get_webenv
+
+ Title : get_webenv
+ Usage : my $webenv = $hist->get_webenv
+ Function : returns web environment key needed to retrieve results from
+ NCBI server
+ Returns : string (encoded key)
+ Args : none
+
+=head2 get_query_key
+
+ Title : get_query_key
+ Usage : my $qk = $hist->get_query_key
+ Function : returns query key (integer) for the history number for this session
+ Returns : integer
+ Args : none
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for the print_* methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/HistoryI.pm b/lib/Bio/Tools/EUtilities/HistoryI.pm
new file mode 100644
index 0000000..f943dd4
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/HistoryI.pm
@@ -0,0 +1,150 @@
+package Bio::Tools::EUtilities::HistoryI;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::HistoryI::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Simple extension of EUtilDataI interface class for classes which hold NCBI server history data.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub history {
+ my $self = shift;
+ $self->parse_data if ($self->can('parse_data') && !$self->data_parsed);
+ if (@_) {
+ my ($webenv, $querykey) = (shift, shift);
+ $self->throw("Missing part of cookie!") if (!$webenv || !$querykey);
+ ($self->{'_webenv'}, $self->{'_querykey'}) = ($webenv, $querykey);
+ }
+ return ($self->get_webenv, $self->get_query_key);
+}
+
+
+sub get_webenv {
+ my $self = shift;
+ $self->parse_data if ($self->can('parse_data') && !$self->data_parsed);
+ return $self->{'_webenv'};
+}
+
+
+sub get_query_key {
+ my $self = shift;
+ $self->parse_data if ($self->can('parse_data') && !$self->data_parsed);
+ return $self->{'_querykey'};
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::HistoryI - Simple extension of EUtilDataI interface class for classes which hold NCBI server history data.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ #should work for any class which is-a HistoryI
+
+ if ($obj->has_History) {
+ # do something here
+ }
+
+ ($webenv, $querykey) = $obj->history;
+
+ $obj->history($webenv, $querykey);
+
+ $webenv = $obj->get_webenv;
+
+ $query_key = $obj->get_query_key;
+
+=head1 DESCRIPTION
+
+This class extends methods for any EUtilDataI implementation allow instances to
+dealwith NCBI history data (WebEnv and query_key). These can be used as
+parameters for further queries against data sets stored on the NCBI server, much
+like NCBI's Entrez search history. These are important when one wants to run
+complex queries using esearch, retrieve related data using elink, and retrieve
+large datasets using epost/efetch.
+
+The simplest implementation is Bio::Tools::EUtilities::History, which holds the
+history data for epost. See also Bio::Tools::EUtilities::Query (esearch) and
+Bio::Tools::EUtilities::LinkSet (elink), which also implement HistoryI.
+
+=head2 history
+
+ Title : history
+ Usage : my ($webenv, $qk) = $hist->history
+ Function : Get/Set two-element list of webenv() and query_key()
+ Returns : array
+ Args : two-element list of webenv, querykey
+
+=head2 get_webenv
+
+ Title : get_webenv
+ Usage : my $webenv = $hist->get_webenv
+ Function : returns web environment key needed to retrieve results from
+ NCBI server
+ Returns : string (encoded key)
+ Args : none
+
+=head2 get_query_key
+
+ Title : get_query_key
+ Usage : my $qk = $hist->get_query_key
+ Function : returns query key (integer) for the history number for this session
+ Returns : integer
+ Args : none
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Info.pm b/lib/Bio/Tools/EUtilities/Info.pm
new file mode 100644
index 0000000..4302e34
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Info.pm
@@ -0,0 +1,231 @@
+package Bio::Tools::EUtilities::Info;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Info::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Tools::EUtilities Bio::Tools::EUtilities::EUtilDataI);
+use Bio::Tools::EUtilities::Info::LinkInfo;
+use Bio::Tools::EUtilities::Info::FieldInfo;
+
+# ABSTRACT: Interface class for storing einfo data.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+
+# private EUtilDataI method
+
+sub _add_data {
+ my ($self, $simple) = @_;
+ if (exists $simple->{DbList} &&
+ exists $simple->{DbList}->{DbName}) {
+ $self->{'_available_databases'} = $simple->{DbList}->{DbName};
+ }
+ # start setting internal variables
+ if (exists $simple->{DbInfo}) {
+ for my $key (sort keys %{ $simple->{DbInfo} }) {
+ my $data =
+ ($key eq 'FieldList') ? $simple->{DbInfo}->{$key}->{Field} :
+ ($key eq 'LinkList' ) ? $simple->{DbInfo}->{$key}->{Link} :
+ $simple->{DbInfo}->{$key};
+ if ($key eq 'FieldList' || $key eq 'LinkList') {
+ for my $chunk (@{$data}) {
+ if (exists $simple->{DbInfo}->{DbName}) {
+ $chunk->{DbFrom} = $simple->{DbInfo}->{DbName};
+ }
+ my $type = ($key eq 'FieldList') ? 'FieldInfo' : 'LinkInfo';
+ my $obj = "Bio::Tools::EUtilities::Info::$type"->new(
+ -eutil => 'einfo',
+ -type => lc $type,
+ -verbose => $self->verbose);
+ $obj->_add_data($chunk);
+ push @{ $self->{'_'.lc $type} }, $obj;
+ }
+ } else {
+ $self->{'_'.lc $key} = $data;
+ }
+ }
+ } else {
+ map { $self->{'_'.lc $_} = $simple->{$_} unless ref $simple->{$_}} keys %$simple;
+ }
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $string = $self->SUPER::to_string;
+ if (my @dbs = $self->get_databases) {
+ $string .= sprintf("%-20s:%s\n\n", 'DB',
+ $self->_text_wrap('', ' 'x20 .':', join(', ', at dbs)));
+ }
+ while (my $fi = $self->next_FieldInfo) {
+ $string .= $fi->to_string."\n";
+ }
+ while (my $li = $self->next_LinkInfo) {
+ $string .= $li->to_string."\n";
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Info - Interface class for storing einfo data.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ #### should not create instance directly; Bio::Tools::EUtilities does this ####
+
+ my $info = Bio::Tools::EUtilities->new(-eutil => 'einfo',
+ -file => 'einfo.xml');
+ # can also use '-response' (for HTTP::Response objects) or '-fh' (for filehandles)
+
+ # print available databases (if data is present)
+
+ print join(', ',$info->get_available_databases),"\n";
+
+ # get database info
+
+ my $db = $info->get_database; # in case you forgot...
+ my $desc = $info->get_description;
+ my $nm = $info->get_menu_name;
+ my $ct = $info->get_record_count;
+ my $dt = $info->get_last_update;
+
+ # EUtilDataI interface methods
+
+ my $eutil = $info->eutil;
+ my $type = $info->datatype;
+
+ # iterate through Field and Link objects
+
+ while (my $field = $info->next_Field) {
+ print "Field code: ",$field->get_field_code,"\n";
+ print "Field name: ",$field->get_field_name,"\n";
+ print "Field desc: ",$field->get_field_description,"\n";
+ print "DB : ",$field->get_database,"\n";
+ print "Term ct : ",$field->get_term_count,"\n";
+ for my $att (qw(is_date is_singletoken is_hierarchy is_hidden is_numerical)) {
+ print "\tField $att\n" if $field->$att;
+ }
+ }
+
+ my @fields = $info->get_Fields; # grab them all (useful for grep)
+
+ while (my $link = $info->next_LinkInfo) {
+ print "Link name: ",$link->get_link_name,"\n";
+ print "Link desc: ",$link->get_link_description,"\n";
+ print "DBFrom: ",$link->get_dbfrom,"\n"; # same as get_database()
+ print "DBTo: ",$link->get_dbto,"\n"; # database linked to
+ }
+
+ my @links = $info->get_LinkInfo; # grab them all (useful for grep)
+
+ $info->rewind(); # rewinds all iterators
+ $info->rewind('links'); # rewinds Link iterator
+ $info->rewind('fields'); # rewinds Field iterator
+
+=head1 DESCRIPTION
+
+This class handles data output (XML) from einfo.
+
+Einfo is capable of returning two types of information:
+
+=over 3
+
+=item * A list of all available databases (when called w/o parameters)
+
+=item * Information about a specific database.
+
+=back
+
+The latter information includes the database description, record count, and
+date/time stamp for the last update, among other things. It also includes a list
+of fields (indices by which record data is stored which can be used in queries)
+and links (crossrefs between related records in other databases at NCBI). Data
+from the latter two are stored in two small subclasses (FieldInfo and LinkInfo)
+which can be iterated through or retrieved all at once, as demonstrated above.
+NOTE: Methods described for the LinkInfo and FieldInfo subclasses are unique to
+those classes (as they retrieve data unique to those data types).
+
+Further documentation for Link and Field subclass methods is included below.
+
+For more information on einfo see:
+
+ http://eutils.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
+
+=head2 rewind
+
+ Title : rewind
+ Usage : $info->rewind() # rewinds all (default)
+ $info->rewind('links') # rewinds only links
+ Function : 'rewinds' (resets) specified interators (all if no arg)
+ Returns : none
+ Args : [OPTIONAL] String:
+ 'all' - all iterators (default)
+ 'linkinfo' - LinkInfo objects only
+ 'fieldinfo' - FieldInfo objects only
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for various print methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Info/FieldInfo.pm b/lib/Bio/Tools/EUtilities/Info/FieldInfo.pm
new file mode 100644
index 0000000..86c0f3c
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Info/FieldInfo.pm
@@ -0,0 +1,292 @@
+package Bio::Tools::EUtilities::Info::FieldInfo;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Info::FieldInfo::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Class for storing einfo field data.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+ $self->eutil('einfo');
+ $self->datatype('fieldinfo');
+ return $self;
+}
+
+
+sub get_term_count { return shift->{'_termcount'} }
+
+
+sub get_field_name { return shift->{'_fullname'} }
+
+
+*get_full_name = \&get_field_name;
+
+
+sub get_field_code { return shift->{'_name'} }
+
+
+sub get_field_description { return shift->{'_description'} }
+
+
+sub is_date {
+ my $self = shift;
+ ($self->{'_isdate'} && $self->{'_isdate'} eq 'Y') ? return 1 : return 0;
+}
+
+
+sub is_singletoken {
+ my $self = shift;
+ ($self->{'_singletoken'} && $self->{'_singletoken'} eq 'Y') ? return 1 : return 0;
+}
+
+
+sub is_hierarchy {
+ my $self = shift;
+ ($self->{'hierarchy'} && $self->{'hierarchy'} eq 'Y') ? return 1 : return 0;
+}
+
+
+sub is_hidden {
+ my $self = shift;
+ ($self->{'_ishidden'} && $self->{'_ishidden'} eq 'Y') ? return 1 : return 0;
+}
+
+
+sub is_numerical {
+ my $self = shift;
+ ($self->{'_isnumerical'} && $self->{'_isnumerical'} eq 'Y') ? return 1 : return 0;
+}
+
+# private EUtilDataI method
+
+sub _add_data {
+ my ($self, $simple) = @_;
+ map { $self->{'_'.lc $_} = $simple->{$_} unless ref $simple->{$_}} keys %$simple;
+}
+
+
+sub to_string {
+ my $self = shift;
+ # order method name
+ my %tags = (1 => ['get_field_code' => 'Field Code'],
+ 2 => ['get_field_name' => 'Field Name'],
+ 3 => ['get_field_description' => 'Description'],
+ 4 => ['get_term_count' => 'Term Count']);
+ my $string;
+ for my $tag (sort {$a <=> $b} keys %tags) {
+ my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
+ $string .= sprintf("%-20s%s\n", $nm,
+ $self->_text_wrap('', ' 'x20 .':', ":".$self->$m));
+ }
+ $string .= sprintf("%-20s%s\n", "Attributes",
+ $self->_text_wrap('', ' 'x20 .':', ":".join(',', grep {$self->$_} qw(is_date
+ is_singletoken is_hierarchy is_hidden is_numerical))));
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Info::FieldInfo - Class for storing einfo field data.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ #### should not create instance directly; Bio::Tools::EUtilities does this ####
+
+ my $info = Bio::Tools::EUtilities->new(-eutil => 'einfo',
+ -file => 'einfo.xml');
+ # can also use '-response' (for HTTP::Response objects) or '-fh' (for filehandles)
+
+ # print available databases (if data is present)
+ print join(', ',$info->get_available_databases),"\n";
+
+ # get database info
+ my $db = $info->get_database; # in case you forgot...
+ my $desc = $info->get_description;
+ my $nm = $info->get_menu_name;
+ my $ct = $info->get_record_count;
+ my $dt = $info->get_last_update;
+ # EUtilDataI interface methods
+ my $eutil = $info->eutil;
+ my $type = $info->type;
+
+ # iterate through Field and Link objects
+ while (my $field = $info->next_Field) {
+ print "Field code: ",$field->get_field_code,"\n";
+ print "Field name: ",$field->get_field_name,"\n";
+ print "Field desc: ",$field->get_field_description,"\n";
+ print "DB : ",$field->get_database,"\n";
+ print "Term ct : ",$field->get_term_count,"\n";
+ for my $att (qw(is_date is_singletoken is_hierarchy is_hidden is_numerical)) {
+ print "\tField $att\n" if $field->$att;
+ }
+ }
+
+ my @fields = $info->get_Fields; # grab them all (useful for grep)
+
+ $info->rewind('fields'); # rewinds Field iterator
+
+=head1 DESCRIPTION
+
+This class handles simple field data output (XML) from einfo.
+
+For more information on einfo see:
+
+ http://eutils.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
+
+=head2 new
+
+ Title : new
+ Note : *** should not be called by end-users ***
+ Usage : my $ct = Bio::Tools::EUtilities::Info::FieldInfo;
+ Function : returns new FieldInfo instance
+ Returns : Bio::Tools::EUtilities::Info::FieldInfo instance
+ Args : none (all data added via _add_data, most methods are getters only)
+
+=head2 get_term_count
+
+ Title : get_term_count
+ Usage : my $ct = $field->get_term_count;
+ Function : returns number of terms for field
+ Returns : integer
+ Args : none
+
+=head2 get_field_name
+
+ Title : get_field_name
+ Usage : my $nm = $field->get_field_name;
+ Function : returns the full name of the field
+ Returns : string
+ Args : none
+
+=head2 get_full_name
+
+ Title : get_full_name
+ Note : alias of get_field_name()
+
+=head2 get_field_code
+
+ Title : get_field_code
+ Usage : $field->get_field_code()
+ Function : returns field code (abbreviation) used for queries
+ Returns : string
+ Args : none
+
+=head2 get_field_description
+
+ Title : get_field_description
+ Usage : $field->get_field_description
+ Function : returns field description
+ Returns : string
+ Args : none
+ Note : alias of get_description()
+
+=head2 is_date
+
+ Title : is_date
+ Usage : if ($field->is_date) {...}
+ Function : returns true if field contains date information
+ Returns : Boolean
+ Args : none
+
+=head2 is_singletoken
+
+ Title : is_singletoken
+ Usage : if ($field->is_singletoken) {...}
+ Function : returns true if field has single value in docsums
+ Returns : Boolean
+ Args : none
+
+=head2 is_hierarchy
+
+ Title : is_hierarchy
+ Usage : if ($field->is_hierarchy) {...}
+ Function : returns true if field contains hierarchal values
+ Returns : Boolean
+ Args : none
+
+=head2 is_hidden
+
+ Title : is_hidden
+ Usage : if ($field->is_hidden) {...}
+ Function : returns true if field is hidden in docsums
+ Returns : Boolean
+ Args : none
+
+=head2 is_numerical
+
+ Title : is_numerical
+ Usage : if ($field->is_numerical) {...}
+ Function : returns true if field contains a numerical value
+ Returns : Boolean
+ Args : none
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for various print methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Info/LinkInfo.pm b/lib/Bio/Tools/EUtilities/Info/LinkInfo.pm
new file mode 100644
index 0000000..bb7945a
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Info/LinkInfo.pm
@@ -0,0 +1,280 @@
+package Bio::Tools::EUtilities::Info::LinkInfo;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Info::LinkInfo::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Class for storing einfo link data.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+ my $eutil = $self->_rearrange([qw(EUTIL)], @args);
+ $eutil ||= 'einfo';
+ $self->eutil($eutil);
+ $self->datatype('linkinfo');
+ return $self;
+}
+
+
+sub get_database {
+ return shift->{'_dbto'};
+}
+
+
+sub get_db {
+ return shift->get_database;
+}
+
+
+sub get_dbto {
+ return shift->get_database;
+}
+
+
+sub get_dbfrom { return shift->{'_dbfrom'} }
+
+
+sub get_link_name {
+ my $self = shift;
+ if ($self->eutil eq 'elink') {
+ return $self->{'_linkname'}
+ } else {
+ return $self->{'_name'}
+ }
+}
+
+
+sub get_link_description { return shift->{'_description'} }
+
+
+sub get_link_menu_name {
+ my $self = shift;
+ return $self->eutil eq 'elink' ? $self->{'_menutag'} : $self->{'_menu'};
+}
+
+
+sub get_priority { return shift->{'_priority'} }
+
+
+sub get_html_tag { return shift->{'_htmltag'} }
+
+
+sub get_url { return shift->{'_url'} }
+
+# private method
+
+sub _add_data {
+ my ($self, $simple) = @_;
+ map { $self->{'_'.lc $_} = $simple->{$_} unless ref $simple->{$_}} keys %$simple;
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $level = shift || 0;
+ my $pad = 20 - $level;
+ # order method name
+ my %tags = (1 => ['get_link_name' => 'Link Name'],
+ 2 => ['get_link_description' => 'Description'],
+ 3 => ['get_dbfrom' => 'DB From'],
+ 4 => ['get_dbto' => 'DB To'],
+ 5 => ['get_link_menu_name' => 'Menu Name'],
+ 6 => ['get_priority' => 'Priority'],
+ 7 => ['get_html_tag' => 'HTML Tag'],
+ 8 => ['get_url' => 'URL'],
+ );
+ my $string = '';
+ for my $tag (sort {$a <=> $b} keys %tags) {
+ my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
+ my $content = $self->$m();
+ next unless $content;
+ $string .= sprintf("%-*s%-*s%s\n",
+ $level, '',
+ $pad, $nm,
+ $self->_text_wrap(':',
+ ' ' x ($pad).':',
+ $content ));
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Info::LinkInfo - Class for storing einfo link data.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ ## should not create instance directly; Bio::Tools::EUtilities does this ##
+
+ # get a LinkInfo object using Bio:Tools::EUtilities
+ print "Link name: ",$link->get_link_name,"\n";
+ print "Link name: ",$link->get_link_menu_name,"\n";
+ print "Link desc: ",$link->get_link_description,"\n";
+ print "DBFrom: ",$link->get_dbfrom,"\n"; # database linked from
+ print "DBTo: ",$link->get_dbto,"\n"; # database linked to
+
+=head1 DESCRIPTION
+
+This class handles data output (XML) from both einfo and elink, and centers on
+describing data that either describes how NCBI databases are linked together
+via link names, or how databases are linked to outside databases (LinkOut).
+
+Further documentation for Link and Field subclass methods is included below.
+
+For more information on einfo see:
+
+ http://eutils.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
+
+=head2 new
+
+ Title : new
+ Note : *** should not be called by end-users ***
+ Usage : my $ct = Bio::Tools::EUtilities::Info::LinkInfo;
+ Function : returns new LinkInfo instance
+ Returns : Bio::Tools::EUtilities::Info::LinkInfo instance
+ Args : none (all data added via _add_data, most methods are getters only)
+
+=head2 get_database
+
+ Title : get_database
+ Usage : my $db = $info->get_database;
+ Function : returns single database name (eutil-compatible). This is the
+ queried database. For elinks (which have 'db' and 'dbfrom')
+ this is equivalent to db/dbto (use get_dbfrom() to for the latter)
+ Returns : string
+ Args : none
+
+=head2 get_db (alias for get_database)
+
+=head2 get_dbto (alias for get_database)
+
+=head2 get_dbfrom
+
+ Title : get_dbfrom
+ Usage : my $origdb = $link->get_dbfrom;
+ Function : returns referring database
+ Returns : string
+ Args : none
+ Note :
+
+=head2 get_link_name
+
+ Title : get_link_name
+ Usage : $ln = $link->get_link_name;
+ Function : returns raw link name (eutil-compatible)
+ Returns : string
+ Args : none
+
+=head2 get_link_description
+
+ Title : get_link_description
+ Usage : $desc = $link->get_link_description;
+ Function : returns the (more detailed) link description
+ Returns : string
+ Args : none
+
+=head2 get_link_menu_name
+
+ Title : get_link_menu_name
+ Usage : my $mn = $link->get_link_menu_name;
+ Function : returns formal menu name
+ Returns : string
+ Args : none
+
+=head2 get_priority
+
+ Title : get_priority
+ Usage : my $mn = $link->get_priority;
+ Function : returns priority ranking
+ Returns : integer
+ Args : none
+ Note : only set when using elink and cmd set to 'acheck'
+
+=head2 get_html_tag
+
+ Title : get_html_tag
+ Usage : my $tag = $link->get_html_tag;
+ Function : returns HTML tag
+ Returns : string
+ Args : none
+ Note : only set when using elink and cmd set to 'acheck'
+
+=head2 get_url
+
+ Title : get_url
+ Usage : my $url = $link->get_url;
+ Function : returns URL string; note that the string isn't usable directly but
+ has the ID replaced with the tag <@UID@>
+ Returns : string
+ Args : none
+ Note : only set when using elink and cmd set to 'acheck'
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for various print methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Link.pm b/lib/Bio/Tools/EUtilities/Link.pm
new file mode 100644
index 0000000..e694a88
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Link.pm
@@ -0,0 +1,162 @@
+package Bio::Tools::EUtilities::Link;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Link::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Tools::EUtilities Bio::Tools::EUtilities::EUtilDataI);
+use Bio::Tools::EUtilities::Link::LinkSet;
+
+# ABSTRACT: General API for accessing data retrieved from elink queries.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+# private EUtilDataI method
+
+{
+ my %SUBCLASS = (
+ 'LinkSetDb' => 'dblink',
+ 'LinkSetDbHistory' => 'history',
+ 'IdUrlList' => 'urllink',
+ 'IdCheckList' => 'idcheck',
+ 'NoLinks' => 'nolinks',
+ );
+
+sub _add_data {
+ my ($self, $data) = @_;
+ # divide up per linkset
+ if (!exists $data->{LinkSet}) {
+ $self->warn("No linksets returned");
+ return;
+ }
+ for my $ls (@{ $data->{LinkSet} }) {
+ my $subclass;
+ # attempt to catch linkset errors
+ if (exists $ls->{ERROR}) {
+ my ($error, $dbfrom) = ($ls->{ERROR},$ls->{DbFrom});
+ $self->warn("NCBI LinkSet error: $dbfrom: $error\n");
+ # try to save the rest of the data, if any
+ next;
+ }
+ # caching for efficiency; no need to recheck
+ if (!exists $self->{'_subclass_type'}) {
+ ($subclass) = grep { exists $ls->{$_} } qw(LinkSetDb LinkSetDbHistory IdUrlList IdCheckList);
+ $subclass ||= 'NoLinks';
+ $self->{'_subclass_type'} = $subclass;
+ } else {
+ $subclass = $self->{'_subclass_type'};
+ }
+ # split these up by ID, since using correspondence() clobbers them...
+ if ($subclass eq 'IdUrlList' || $subclass eq 'IdCheckList') {
+ my $list = $subclass eq 'IdUrlList' ? 'IdUrlSet' :
+ $subclass eq 'IdCheckList' && exists $ls->{$subclass}->{IdLinkSet} ? 'IdLinkSet' :
+ 'Id';
+ $ls->{$subclass} = $ls->{$subclass}->{$list};
+ }
+ # divide up linkset per link
+ for my $ls_sub (@{ $ls->{$subclass} }) {
+ for my $key (qw(WebEnv DbFrom IdList)) {
+ $ls_sub->{$key} = $ls->{$key} if exists $ls->{$key};
+ }
+ my $obj = Bio::Tools::EUtilities::Link::LinkSet->new(-eutil => 'elink',
+ -datatype => $SUBCLASS{$subclass},
+ -verbose => $self->verbose);
+ $obj->_add_data($ls_sub);
+ push @{$self->{'_linksets'}}, $obj;
+ # push only potential history-carrying objects into history queue
+ if ($subclass eq 'LinkSetDbHistory') {
+ push @{$self->{'_histories'}}, $obj;
+ }
+ }
+ }
+}
+
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $string = $self->SUPER::to_string;
+ while (my $ls = $self->next_LinkSet) {
+ $string .= $ls->to_string;
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Link - General API for accessing data retrieved from elink queries.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ ...TODO
+
+=head1 DESCRIPTION
+
+Bio::Tools::EUtilities::Link is a loadable plugin for Bio::Tools::EUtilities
+that specifically handles NCBI elink-related data.
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for various print methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Link/LinkSet.pm b/lib/Bio/Tools/EUtilities/Link/LinkSet.pm
new file mode 100644
index 0000000..0c9fad8
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Link/LinkSet.pm
@@ -0,0 +1,571 @@
+package Bio::Tools::EUtilities::Link::LinkSet;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Link::LinkSet::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::HistoryI);
+use Bio::Tools::EUtilities::Link::UrlLink;
+use Bio::Tools::EUtilities::Info::LinkInfo;
+
+# ABSTRACT: Class for EUtils LinkSets.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+sub new {
+ my ($class, at args) = @_;
+ my $self = $class->SUPER::new(@args);
+ my ($type) = $self->_rearrange([qw(DATATYPE)], at args);
+ $type ||= 'linkset';
+ $self->eutil('elink');
+ $self->datatype($type);
+ return $self;
+}
+
+
+sub get_ids {
+ my $self = shift;
+ unless ($self->{'_sorted_id'}) {
+ @{$self->{'_sorted_id'}} =
+ sort {
+ $self->{'_id'}->{$a}->[0] <=>
+ $self->{'_id'}->{$b}->[0]
+ } keys %{$self->{'_id'}};
+ }
+ return @{$self->{'_sorted_id'}};
+}
+
+
+sub get_database {
+ return ($_[0]->get_databases)[0];
+}
+
+
+sub get_db {
+ return shift->get_database;
+}
+
+
+sub get_dbto {
+ return shift->get_database;
+}
+
+
+sub get_databases {
+ my $self = shift;
+ my %tmp;
+ my @dbs = sort map {$_->get_database}
+ grep {!$tmp{$_->get_database}++} ($self->get_LinkInfo);
+ unshift @dbs, $self->{'_dbto'} if $self->{'_dbto'} && !$tmp{$self->{'_dbto'}}++;
+ return @dbs;
+}
+
+
+sub get_dbs {
+ return shift->get_databases;
+}
+
+
+sub get_dbfrom { return shift->{'_dbfrom'} }
+
+
+sub get_link_names {
+ my ($self) = shift;
+ my %tmps;
+ my @lns;
+ if ($self->{'_linkname'}) {
+ push @lns, $self->{'_linkname'};
+ $tmps{$self->{'_linkname'}}++;
+ }
+ push @lns, map {$_->get_link_name} $self->get_LinkInfo;
+ return @lns;
+}
+
+
+sub get_link_name {
+ return ($_[0]->get_link_names)[0];
+}
+
+
+sub get_submitted_ids {
+ my $self = shift;
+ my $datatype = $self->datatype;
+ if ($datatype eq 'idcheck' || $datatype eq 'urllink') {
+ return $self->get_ids;
+ } elsif ($self->{'_submitted_ids'}) {
+ return @{$self->{'_submitted_ids'}};
+ } else {
+ return ();
+ }
+}
+
+
+sub has_scores {
+ my $self = shift;
+ return exists $self->{'_has_scores'} ? 1 : 0;
+}
+
+
+sub get_scores {
+ my $self = shift;
+ # do we want to cache this or generate only when needed? Likely won't be
+ # called more than once...
+ return unless $self->has_scores;
+ my %scores = map {$_ => $self->{'_id'}->{$_}->[1]} keys %{$self->{'_id'}};
+ return %scores;
+}
+
+
+sub get_score_by_id {
+ my ($self, $id) = @_;
+ ($id && exists $self->{'_id'}->{$id}) ? return $self->{'_id'}->{$id}->[1] :
+ return;
+}
+
+
+sub has_linkout {
+ my $self = shift;
+ if (exists $self->{'_haslinkout'}) {
+ return $self->{'_haslinkout'} eq 'Y' ? 1 : 0;
+ } else {
+ return (grep {$_ eq 'LinkOut'} $self->get_databases) ? 1 : 0;
+ }
+}
+
+
+sub has_neighbor {
+ my $self = shift;
+ if (exists $self->{'_hasneighbor'}) {
+ return $self->{'_hasneighbor'} eq 'Y' ? 1 : 0;
+ } else {
+ return 0;
+ }
+}
+
+
+sub next_UrlLink {
+ my $self = shift;
+ unless ($self->{"_urllinks_it"}) {
+ my @ul = $self->get_UrlLinks;
+ $self->{"_urllinks_it"} = sub {return shift @ul}
+ }
+ $self->{'_urllinks_it'}->();
+}
+
+
+sub get_UrlLinks {
+ my $self = shift;
+ return ref $self->{'_urllinks'} ? @{ $self->{'_urllinks'} } : return;
+}
+
+
+sub next_LinkInfo {
+ my $self = shift;
+ unless ($self->{"_linkinfo_it"}) {
+ my @li = $self->get_LinkInfo;
+ $self->{"_linkinfo_it"} = sub {return shift @li}
+ }
+ $self->{'_linkinfo_it'}->();
+}
+
+
+sub get_LinkInfo {
+ my $self = shift;
+ return ref $self->{'_linkinfo'} ? @{ $self->{'_linkinfo'} } : return ();
+}
+
+
+{
+ my %VALID_DATA = ('linkinfo' => 'linkinfo',
+ 'linkinfos' => 'linkinfo',
+ 'urllinks' => 'urllinks');
+
+ sub rewind {
+ my ($self, $arg) = @_;
+ $arg ||= 'all';
+ if (exists $VALID_DATA{$arg}) {
+ delete $self->{'_'.$arg.'_it'};
+ } elsif ($arg eq 'all') {
+ delete $self->{'_'.$_.'_it'} for values %VALID_DATA;
+ }
+ }
+}
+
+# private methods and handlers
+
+{
+ my %DATA_HANDLER = (
+ 'IdList' => \&_add_submitted_ids,
+ 'Id' => \&_add_retrieved_ids,
+ 'LinkInfo' => \&_add_linkinfo,
+ 'Link' => \&_add_retrieved_ids,
+ 'ObjUrl' => \&_add_objurls,
+ );
+
+sub _add_data {
+ my ($self, $data) = @_;
+ for my $key (qw(IdList Link Id ObjUrl LinkInfo)) {
+ next if !exists $data->{$key};
+ my $handler = $DATA_HANDLER{$key};
+ $self->$handler($data);
+ delete $data->{$key};
+ }
+ # map the rest
+ if ($self->datatype eq 'idcheck' && exists $data->{content}) {
+ %{$self->{'_id'} } = ($data->{content} => [1]);
+ delete $data->{content}
+ }
+ map {$self->{'_'.lc $_} = $data->{$_}} keys %$data;
+}
+
+}
+
+sub _add_submitted_ids {
+ my ($self, $data) = @_;
+ if (exists $data->{IdList}->{Id}) {
+ @{$self->{'_submitted_ids'}} = @{$data->{IdList}->{Id}} ;
+ }
+}
+
+sub _add_retrieved_ids {
+ my ($self, $data) = @_;
+ # map all IDs to deal with possible scores
+ # ID => {'count' = POSITION, 'score' => SCORE}
+ if (exists $data->{Link}) {
+ my $ct = 0;
+ for my $link (@{$data->{Link}}) {
+ if (exists $link->{Score}) {
+ $self->{'_has_scores'}++;
+ $self->{'_id'}->{$link->{Id}->[0]} = [ $ct++,$link->{Score}];
+ } else {
+ $self->{'_id'}->{$link->{Id}->[0]} = [ $ct++ ];
+ }
+ }
+ }
+ elsif (exists $data->{Id}) { # urls
+ %{$self->{'_id'} } = ($data->{Id}->[0] => [1]);
+ }
+}
+
+sub _add_objurls {
+ my ($self, $data) = @_;
+ for my $urldata (@{$data->{ObjUrl}}) {
+ $urldata->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
+ my $obj = Bio::Tools::EUtilities::Link::UrlLink->new(-eutil => 'elink',
+ -datatype => 'urldata',
+ -verbose => $self->verbose
+ );
+ $obj->_add_data($urldata);
+ push @{$self->{'_urllinks'}}, $obj;
+ }
+}
+
+sub _add_linkinfo {
+ my ($self, $data) = @_;
+ for my $linkinfo (@{$data->{LinkInfo}}) {
+ $linkinfo->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
+ my $obj = Bio::Tools::EUtilities::Info::LinkInfo->new(-eutil => 'elink',
+ -datatype => 'linkinfo',
+ -verbose => $self->verbose
+ );
+ $obj->_add_data($linkinfo);
+ push @{$self->{'_linkinfo'}}, $obj;
+ }
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $level = shift || 0;
+ my $pad = 20 - $level;
+ # order method name
+ my %tags = (1 => ['get_databases' => 'DB'],
+ 2 => ['get_ids' => 'ID'],
+ 3 => ['get_link_names' => 'Link Names'],
+ 5 => ['get_submitted_ids' => 'Submitted IDs'],
+ 6 => ['has_scores' => 'Scores?'],
+ 7 => ['has_linkout' => 'LinkOut?'],
+ 8 => ['has_neighbor' => 'DB Neighbors?'],
+ 9 => ['get_webenv' => 'WebEnv'],
+ 10 => ['get_query_key' => 'Key'],
+ );
+ my $string;
+ for my $tag (sort {$a <=> $b} keys %tags) {
+ my ($m, $nm) = (@{$tags{$tag}}[0..1]);
+ # using this awkward little construct to deal with both lists and scalars
+ my @content = grep {defined $_} $self->$m();
+ next unless @content;
+ $string .= $self->_text_wrap(
+ sprintf("%-*s%-*s:",$level, '',$pad, $nm,),
+ ' ' x ($pad).':',
+ join(', ', at content))."\n";
+ }
+ while (my $li = $self->next_LinkInfo) {
+ $string .= $li->to_string(4);
+ }
+ while (my $ui = $self->next_UrlLink) {
+ $string .= $ui->to_string(4);
+ }
+ if ($self->has_scores) {
+ $string .= "Scores:\n";
+ my %scores = $self->get_scores;
+ $string .= sprintf("%-*s%-*s%s\n",
+ $level + 4, '',
+ $pad - 4, 'ID', 'Score'
+ );
+ for my $id ($self->get_ids) {
+ $string .= sprintf("%-*s%-*s%s\n",
+ $level + 4, '',
+ $pad - 4, $id, $scores{$id}
+ );
+ }
+ }
+ $string .= "\n";
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Link::LinkSet - Class for EUtils LinkSets.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # ...
+
+=head1 DESCRIPTION
+
+ # ...
+
+=head2 get_ids
+
+ Title : get_ids
+ Usage : my @ids = $linkset->get_ids
+ Function : returns list of retrieved IDs
+ Returns : array of IDs
+ Args : none
+ Notes : Cmd Description
+ acheck same as get_submitted_ids
+ lcheck same as get_submitted_ids
+ ncheck same as get_submitted_ids
+ prlinks same as get_submitted_ids
+ llinks same as get_submitted_ids
+ llinkslib same as get_submitted_ids
+ neighbor linked IDs for database in get_database
+ neighbor_history linked IDs for database in get_database
+
+=head2 get_database
+
+ Title : get_database
+ Usage : my $db = $info->get_database;
+ Function : returns single database name (eutil-compatible). This is the
+ queried database. For elinks (which have 'db' and 'dbfrom')
+ this is equivalent to db/dbto (use get_dbfrom() to for the latter).
+ Note that this only returns the first db; in some cases this may
+ not be what you want (when multiple dbs are queried, for instance)
+ Returns : string
+ Args : none
+ Notes : with all elink cmd arguments
+
+=head2 get_db (alias for get_database)
+
+=head2 get_dbto (alias for get_database)
+
+=head2 get_databases
+
+ Title : get_databases
+ Usage : my $string = $linkset->get_databases;
+ Function : retrieve databases referred to for this linkset
+ these may be present as a single database or embedded in
+ Returns : array of strings
+ Args : none
+
+=head2 get_dbs (alias for get_databases)
+
+=head2 get_dbfrom
+
+ Title : get_dbfrom
+ Usage : my $string = $linkset->get_dbfrom;
+ Function : retrieve originating database for this linkset
+ Returns : string
+ Args : none
+
+=head2 get_link_names
+
+ Title : get_link_names
+ Usage : my $string = $linkset->get_link_names;
+ Function : retrieve eutil-compatible link names
+ Returns : array of strings
+ Args : none
+ Notes : Each LinkSet can hold multiple LinkInfo objects (each containing
+ a link name). Also, some LinkSets define a single link name. This
+ returns an array with all unique linknames globbed both sources, if
+ present and defined
+
+=head2 get_link_name
+
+ Title : get_link_name
+ Usage : my $string = $linkset->get_link_name;
+ Function : retrieve eutil-compatible link name
+ Returns : single link name
+ Args : none
+
+=head2 get_submitted_ids
+
+ Title : get_submitted_ids
+ Usage : my $string = $linkset->get_submitted_ids;
+ Function : retrieve original ID list
+ Returns : string
+ Args : none
+
+=head2 has_scores
+
+ Title : has_scores
+ Usage : if (my $linkset->has_scores) {...}
+ Function : returns TRUE if score data is present
+ Returns : Boolean
+ Args : none
+
+=head2 get_scores
+
+ Title : get_scores
+ Usage : %scores = $linkset->get_scores;
+ Function : returns flattened list or hash ref containing ID => score pairs
+ Returns : hash or hash ref (based on list or scalar context)
+ Args : none
+
+=head2 get_score_by_id
+
+ Title : get_score_by_id
+ Usage : $score = $linkset->get_score_by_id($id);
+ Function : returns the score for a particular primary ID
+ Returns : integer
+ Args : [REQUIRED] Primary ID for the score lookup
+
+=head2 has_linkout
+
+ Title : has_linkout
+ Usage : if ($linkset->has_linkout) {...}
+ Function : returns TRUE if the single ID present in this linkset has a linkout
+ Returns : boolean
+ Args : none
+ Notes : this checks cmd=lcheck (boolean for a linkout) and also backchecks
+ cmd=acheck for databases with name 'LinkOut'
+
+=head2 has_neighbor
+
+ Title : has_neighbor
+ Usage : if ($linkset->has_neighbor) {...}
+ Function : returns TRUE if the single ID present in this linkset has a neighbor
+ in the same database
+ Returns : boolean
+ Args : none
+ Notes : this checks cmd=ncheck (boolean for a neighbor in same database); no
+ other checks performed at this time
+
+=head2 next_UrlLink
+
+ Title : next_UrlLink
+ Usage : while (my $url = $linkset->next_UrlLink) {...}
+ Function : iterate through UrlLink objects
+ Returns : Bio::Tools::EUtilities::Link::UrlLink
+ Args :
+
+=head2 get_UrlLinks
+
+ Title : get_UrlLinks
+ Usage : my @urls = $linkset->get_UrlLinks
+ Function : returns all UrlLink objects
+ Returns : list of Bio::Tools::EUtilities::Link::UrlLink
+ Args :
+
+=head2 next_LinkInfo
+
+ Title : next_LinkInfo
+ Usage : while (my $info = $linkset->next_LinkInfo) {...}
+ Function : iterate through LinkInfo objects
+ Returns : Bio::Tools::EUtilities::Link::LinkInfo
+ Args :
+
+=head2 get_LinkInfo
+
+ Title : get_LinkInfo
+ Usage : my @links = $linkset->get_LinkInfo
+ Function : returns all LinkInfo objects
+ Returns : list of Bio::Tools::EUtilities::Link::LinkInfo
+ Args :
+
+=head2 rewind
+
+ Title : rewind
+ Usage : $info->rewind() # rewinds all (default)
+ $info->rewind('links') # rewinds only links
+ Function : 'rewinds' (resets) specified interators (all if no arg)
+ Returns : none
+ Args : [OPTIONAL] String:
+ 'all' - all iterators (default)
+ 'linkinfo' or 'linkinfos' - LinkInfo objects only
+ 'urllinks' - UrlLink objects only
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for various print methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Link/UrlLink.pm b/lib/Bio/Tools/EUtilities/Link/UrlLink.pm
new file mode 100644
index 0000000..4af4fe1
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Link/UrlLink.pm
@@ -0,0 +1,256 @@
+package Bio::Tools::EUtilities::Link::UrlLink;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Link::UrlLink::VERSION = '1.75';
+use utf8;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Class for EUtils UrlLinks.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub get_dbfrom { return shift->{'_dbfrom'}; }
+
+
+sub get_attribute { return shift->{'_attribute'}; }
+
+
+sub get_icon_url { return shift->{'_iconurl'}; }
+
+
+sub get_subject_type { return shift->{'_subjecttype'}; }
+
+
+sub get_url {
+ my $self = shift;
+ # fix Entrz LinkOut URLS without the full URL
+ if ($self->{'_url'} && $self->{'_url'} =~ m{^/}) {
+ $self->{'_url'} = 'https://www.ncbi.nih.gov'.$self->{'_url'};
+ }
+ return $self->{'_url'};
+}
+
+
+sub get_link_name { return shift->{'_linkname'}; }
+
+
+sub get_provider_name { return shift->{'_provider_name'}; }
+
+
+sub get_provider_abbr { return shift->{'_provider_nameabbr'}; }
+
+
+sub get_provider_id { return shift->{'_provider_id'}[0]; }
+
+
+sub get_provider_icon_url { return shift->{'_provider_iconurl'}; }
+
+
+sub get_provider_url { return shift->{'_provider_url'}; }
+
+# private method
+
+sub _add_data {
+ my ($self, $data) = @_;
+ if (exists $data->{Provider}) {
+ map {$self->{'_provider_'.lc $_} = $data->{Provider}->{$_};
+ } keys %{$data->{Provider}};
+ delete $data->{Provider};
+ }
+ map {$self->{'_'.lc $_} = $data->{$_} if $data->{$_}} keys %$data;
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $level = shift || 0;
+ my $pad = 20 - $level;
+ # order method name
+ my %tags = (1 => ['get_link_name' => 'Link Name'],
+ 2 => ['get_subject_type' => 'Subject Type'],
+ 3 => ['get_dbfrom' => 'DB From'],
+ 4 => ['get_attribute' => 'Attribute'],
+ 6 => ['get_icon_url' => 'IconURL'],
+ 7 => ['get_url' => 'URL'],
+ 8 => ['get_provider_name' => 'Provider'],
+ 9 => ['get_provider_abbr' => 'ProvAbbr'],
+ 10 => ['get_provider_id' => 'ProvID'],
+ 11 => ['get_provider_url' => 'ProvURL'],
+ 12 => ['get_provider_icon_url' => 'ProvIcon'],
+ );
+ my $string = '';
+ for my $tag (sort {$a <=> $b} keys %tags) {
+ my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
+ my $content = $self->$m();
+ next unless $content;
+ $string .= $self->_text_wrap(
+ sprintf("%-*s%-*s:",$level, '',$pad, $nm,),
+ ' ' x ($pad).':',
+ $content)."\n";
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Link::UrlLink - Class for EUtils UrlLinks.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # ...
+
+=head1 DESCRIPTION
+
+ # ...
+
+=head2 get_dbfrom
+
+ Title : get_dbfrom
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_attribute
+
+ Title : get_attribute
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_icon_url
+
+ Title : get_icon_url
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_subject_type
+
+ Title :
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_url
+
+ Title : get_url
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_link_name
+
+ Title : get_link_name
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_provider_name
+
+ Title : get_provider_name
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_provider_abbr
+
+ Title : get_provider_abbr
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_provider_id
+
+ Title : get_provider_id
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_provider_icon_url
+
+ Title : get_provider_icon_url
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_provider_url
+
+ Title : get_provider_url
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for various print methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Query.pm b/lib/Bio/Tools/EUtilities/Query.pm
new file mode 100644
index 0000000..a6ffd9f
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Query.pm
@@ -0,0 +1,192 @@
+package Bio::Tools::EUtilities::Query;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Query::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use Bio::Tools::EUtilities::Query::GlobalQuery;
+use Bio::Tools::EUtilities::History;
+use base qw(Bio::Tools::EUtilities);
+
+# ABSTRACT: Parse and collect esearch, epost, espell, egquery information.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+# private EUtilDataI method
+
+{
+my %TYPE = (
+ 'espell' => 'spelling',
+ 'esearch' => 'singledbquery',
+ 'egquery' => 'multidbquery',
+ 'epost' => 'history'
+ );
+
+sub _add_data {
+ my ($self, $qdata) = @_;
+ my $eutil = $self->eutil;
+ if (!$qdata || ref($qdata) !~ /HASH/i) {
+ $self->throw("Bad $eutil data");
+ }
+ if (exists $qdata->{WebEnv}) {
+ my $cookie = Bio::Tools::EUtilities::History->new(-eutil => $eutil,
+ -verbose => $self->verbose);
+ $cookie->_add_data($qdata);
+ push @{$self->{'_histories'}}, $cookie;
+ }
+ my $type = exists $TYPE{$eutil} ? $TYPE{$eutil} :
+ $self->throw("Unrecognized eutil $eutil");
+ $self->datatype($type); # reset type based on what's present
+ for my $key (sort keys %$qdata) {
+ if ($key eq 'eGQueryResult' && exists $qdata->{$key}->{ResultItem}) {
+ for my $gquery (@{ $qdata->{eGQueryResult}->{ResultItem} }) {
+ $self->{'_term'} = $gquery->{Term} = $qdata->{Term};
+ my $qd = Bio::Tools::EUtilities::Query::GlobalQuery->new(-eutil => 'egquery',
+ -datatype => 'globalquery',
+ -verbose => $self->verbose);
+ $qd->_add_data($gquery);
+ push @{ $self->{'_globalqueries'} }, $qd;
+ }
+ }
+ if ($key eq 'IdList' &&
+ exists $qdata->{IdList}->{Id}) {
+ $self->{'_id'} = $qdata->{IdList}->{Id};
+ delete $qdata->{IdList};
+ }
+ if ($key eq 'TranslationSet' &&
+ exists $qdata->{TranslationSet}->{Translation}) {
+ $self->{'_translation'} = $qdata->{TranslationSet}->{Translation};
+ delete $qdata->{TranslationSet};
+ }
+ next if (ref $qdata->{$key} eq 'HASH' && !keys %{$qdata->{$key}});
+ $self->{'_'.lc $key} = $qdata->{$key};
+ }
+}
+
+}
+
+
+sub to_string {
+ my $self = shift;
+ my %data = (
+ 'DB' => [1, join(', ',$self->get_databases) || ''],
+ 'Query' => [2, $self->get_term || ''],
+ 'IDs' => [4, join(', ',$self->get_ids) || ''],
+ );
+ my $string = $self->SUPER::to_string;
+ if ($self->eutil eq 'esearch') {
+ $data{'Count'} = [3, $self->get_count ];
+ $data{'Translation From'} = [5, $self->get_translation_from || ''];
+ $data{'Translation To'} = [6, $self->get_translation_to || ''];
+ $data{'RetStart'} = [7, $self->get_retstart];
+ $data{'RetMax'} = [8, $self->get_retmax];
+ $data{'Translation'} = [9, $self->get_query_translation || ''];
+ }
+ if ($self->eutil eq 'espell') {
+ $data{'Corrected'} = [3, $self->get_corrected_query || ''];
+ $data{'Replaced'} = [4, join(',',$self->get_replaced_terms) || ''];
+ }
+ for my $k (sort {$data{$a}->[0] <=> $data{$b}->[0]} keys %data) {
+ $string .= sprintf("%-20s:%s\n",$k, $self->_text_wrap('',' 'x 20 .':', $data{$k}->[1]));
+ }
+ while (my $h = $self->next_History) {
+ $string .= $h->to_string;
+ }
+ while (my $gq = $self->next_GlobalQuery) {
+ $string .= $gq->to_string;
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Query - Parse and collect esearch, epost, espell, egquery information.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ ### should not create instance directly; Bio::Tools::EUtilities does this ###
+
+ # can also use '-response' (for HTTP::Response objects) or '-fh' (for
+ # filehandles)
+
+ my $info = Bio::Tools::EUtilities->new(-eutil => 'esearch',
+ -file => 'esearch.xml');
+
+ # esearch
+
+ # esearch with history
+
+ # egquery
+
+ # espell (just for completeness, really)
+
+=head1 DESCRIPTION
+
+Pluggable module for handling query-related data returned from eutils.
+
+=head1 Bio::Tools::EUtilities::Query methods
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for the print_* methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Query/GlobalQuery.pm b/lib/Bio/Tools/EUtilities/Query/GlobalQuery.pm
new file mode 100644
index 0000000..48db62a
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Query/GlobalQuery.pm
@@ -0,0 +1,193 @@
+package Bio::Tools::EUtilities::Query::GlobalQuery;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Query::GlobalQuery::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Container class for egquery data.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+ $self->eutil('egquery');
+ $self->datatype('globalquery');
+ return $self;
+}
+
+
+sub get_term {
+ my ($self) = @_;
+ return $self->{'_term'};
+}
+
+
+sub get_database {
+ my ($self) = @_;
+ return $self->{'_dbname'};
+}
+
+
+sub get_count {
+ my ($self) = @_;
+ return $self->{'_count'};
+}
+
+
+sub get_status {
+ my ($self) = @_;
+ return $self->{'_status'};
+}
+
+
+sub get_menu_name {
+ my $self = shift;
+ return $self->{'_menuname'};
+}
+
+# private method
+
+sub _add_data {
+ my ($self, $data) = @_;
+ map {$self->{'_'.lc $_} = $data->{$_}} keys %$data;
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $string .= sprintf("%-20s Total:%-10d Status:%s\n",
+ $self->get_database,
+ $self->get_count,
+ $self->get_status);
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Query::GlobalQuery - Container class for egquery data.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ #### should not create instance directly; Bio::Tools::EUtilities does this ####
+
+ my $parser = Bio::Tools::EUtilities->new(-eutil => 'egquery',
+ -term => 'BRCA1');
+
+ # $gquery is a Bio::Tools::EUtilities::Query::GlobalQuery
+ while (my $gquery = $parser->next_GlobalQuery) {
+ print $gquery->to_string."\n"; # stringify
+ print "DB:".$gquery->get_db."\t".$gquery->get_count;
+ }
+
+=head1 DESCRIPTION
+
+This is a simple container class for egquery data. Currently this just contains
+various accessors for the data, such as get_database(), get_count(), etc. for
+each item in a global query.
+
+=head2 get_term
+
+ Title : get_term
+ Usage : $st = $qd->get_term;
+ Function: retrieve the term for the global search
+ Returns : string
+ Args : none
+
+=head2 get_database
+
+ Title : get_database
+ Usage : $ct = $qd->get_database;
+ Function: retrieve the database
+ Returns : string
+ Args : none
+
+=head2 get_count
+
+ Title : get_count
+ Usage : $ct = $qd->get_count;
+ Function: retrieve the count for the database
+ Returns : string
+ Args : none
+
+=head2 get_status
+
+ Title : get_status
+ Usage : $st = $qd->get_status;
+ Function: retrieve the query status for database in db()
+ Returns : string
+ Args : none
+
+=head2 get_menu_name
+
+ Title : get_menu_name
+ Usage : $ct = $qd->get_menu_name;
+ Function: retrieve the full name for the database in db()
+ Returns : string
+ Args : None
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for the print_GlobalQuery method
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Summary.pm b/lib/Bio/Tools/EUtilities/Summary.pm
new file mode 100644
index 0000000..b9f103c
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Summary.pm
@@ -0,0 +1,140 @@
+package Bio::Tools::EUtilities::Summary;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Summary::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use Bio::Tools::EUtilities::Summary::DocSum;
+use base qw(Bio::Tools::EUtilities Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Class for handling data output (XML) from esummary.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+# private EUtilDataI method
+
+sub _add_data {
+ my ($self, $data) = @_;
+ if (!exists $data->{DocSum}) {
+ $self->warn('No returned docsums.');
+ return;
+ }
+
+ my @docs;
+ for my $docsum (@{ $data->{DocSum} }) {
+ my $ds = Bio::Tools::EUtilities::Summary::DocSum->new(-datatype => 'docsum',
+ -verbose => $self->verbose);
+ $ds->_add_data($docsum);
+ push @{ $self->{'_docsums'} }, $ds;
+ }
+}
+
+
+sub to_string {
+ my $self = shift;
+ my %data = (
+ 'DB' => [1, join(', ',$self->get_databases) || ''],
+ );
+ my $string = $self->SUPER::to_string."\n";
+ for my $k (sort {$data{$a}->[0] <=> $data{$b}->[0]} keys %data) {
+ $string .= sprintf("%-20s:%s\n\n",$k, $self->_text_wrap('',' 'x 20 .':', $data{$k}->[1]));
+ }
+ while (my $ds = $self->next_DocSum) {
+ $string .= $ds->to_string."\n";
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Summary - Class for handling data output (XML) from esummary.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ #### should not create instance directly; Bio::Tools::EUtilities does this ####
+
+ my $esum = Bio::Tools::EUtilities->new(-eutil => 'esummary',
+ -file => 'summary.xml');
+ # can also use '-response' (for HTTP::Response objects) or '-fh' (for filehandles)
+
+ while (my $docsum = $esum->next_DocSum) {
+ my $id = $docsum->get_ids; # EUtilDataI compliant method, returns docsum ID
+ my @names = $docsum->get_item_names;
+ }
+
+=head1 DESCRIPTION
+
+This class handles data output (XML) from esummary.
+
+esummary retrieves information in the form of document summaries (docsums) when
+passed a list of primary IDs or if using a previous search history.
+
+This module breaks down the returned data from esummary into individual document
+summaries per ID (using a DocSum object). As the data in a docsum can be nested,
+subclasses of DocSums (Item, ListItem, Structure) are also present.
+
+Further documentation for Link and Field subclass methods is included below.
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for the print_* methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Summary/DocSum.pm b/lib/Bio/Tools/EUtilities/Summary/DocSum.pm
new file mode 100644
index 0000000..b28e06e
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Summary/DocSum.pm
@@ -0,0 +1,280 @@
+package Bio::Tools::EUtilities::Summary::DocSum;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Summary::DocSum::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::Summary::ItemContainerI);
+use Bio::Tools::EUtilities::Summary::Item;
+
+# ABSTRACT: Data object for document summary data from esummary.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+ my ($type) = $self->_rearrange(['DATATYPE'], at args);
+ $type ||= 'docsum';
+ $self->eutil('esummary');
+ $self->datatype($type);
+ return $self;
+}
+
+
+sub get_ids {
+ my $self = shift;
+ return wantarray ? $self->{'_id'} : [$self->{'_id'}];
+}
+
+
+sub get_id {
+ my $self = shift;
+ return $self->{'_id'};
+}
+
+
+
+
+
+
+
+
+
+
+sub rewind {
+ my ($self, $request) = @_;
+ if ($request && $request eq 'all') {
+ map {$_->rewind('all') } $self->get_Items;
+ }
+ delete $self->{"_items_it"};
+}
+
+# private EUtilDataI method
+
+sub _add_data {
+ my ($self, $data) = @_;
+ if ($data->{Item}) {
+ $self->{'_id'} = $data->{Id} if exists $data->{Id};
+ for my $sd (@{ $data->{Item} } ) {
+ $sd->{Id} = $data->{Id} if exists $data->{Id};
+ my $subdoc =
+ Bio::Tools::EUtilities::Summary::Item->new(-datatype => 'item',
+ -verbose => $self->verbose);
+ $subdoc->_add_data($sd);
+ push @{ $self->{'_items'} }, $subdoc;
+ }
+ }
+ $self->{'_id'} = $data->{Id} if exists $data->{Id};
+}
+
+
+sub to_string {
+ my $self = shift;
+ my $string = sprintf("%-20s%s\n",'UID', ':'.$self->get_id);
+ while (my $item = $self->next_Item) {
+ $string .= $item->to_string;
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Summary::DocSum - Data object for document summary data from esummary.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # Implement ItemContainerI
+
+ # $foo is any ItemContainerI (current implementations are DocSum and Item itself)
+
+ while (my $item = $foo->next_Item) { # iterate through contained Items
+ # do stuff here
+ }
+
+ @items = $foo->get_Items; # all Items in the container (hierarchy intact)
+ @items = $foo->get_all_Items; # all Items in the container (flattened)
+ @items = $foo->get_Items_by_name('bar'); # Specifically named Items
+ ($content) = $foo->get_contents_by_name('bar'); # content from specific Items
+ ($type) = $foo->get_type_by_name('bar'); # data type from specific Items
+
+=head1 DESCRIPTION
+
+This is the basic class for Document Summary data from NCBI eUtils, returned
+from esummary. This implements the simple ItemContainerI interface.
+
+=head2 new
+
+ Title : new
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_ids
+
+ Title : get_ids
+ Usage : my ($id) = $item->get_ids;
+ Function : returns array or array ref with id
+ Returns : array or array ref
+ Args : none
+ Note : the behavior of this method remains consistent with other
+ implementations of get_ids(). To retrieve the single DocSum ID
+ use get_id()
+
+=head2 get_id
+
+ Title : get_id
+ Usage : my ($id) = $item->get_id;
+ Function : returns UID of record
+ Returns : integer
+ Args : none
+
+=head1 ItemContainerI methods
+
+=head2 next_Item
+
+ Title : next_Item
+ Usage : while (my $item = $docsum->next_Item) {...}
+ Function : iterates through Items (nested layer of Item)
+ Returns : single Item
+ Args : [optional] single arg (string)
+ 'flatten' - iterates through a flattened list ala
+ get_all_DocSum_Items()
+
+=head2 get_Items
+
+ Title : get_Items
+ Usage : my @items = $docsum->get_Items
+ Function : returns list of, well, Items
+ Returns : array of Items
+ Args : none
+
+=head2 get_all_Items
+
+ Title : get_all_Items
+ Usage : my @items = $docsum->get_all_Items
+ Function : returns flattened list of all Item objects (Items, ListItems,
+ StructureItems)
+ Returns : array of Items
+ Args : none
+ Note : items are added top-down (similar order to using nested calls)
+ in original list order.
+
+ 1 2 7 8
+ Item - Item - Item - Item ...
+ |
+ | 3 6
+ ListItem - ListItem
+ |
+ | 4 5
+ Structure - Structure
+
+=head2 get_all_names
+
+ Title : get_all_names
+ Usage : my @names = get_all_names()
+ Function : Returns an array of names for all Item(s) in DocSum.
+ Returns : array of unique strings
+ Args : none
+
+=head2 get_Items_by_name
+
+ Title : get_Items_by_name
+ Usage : my @items = get_Items_by_name('CreateDate')
+ Function : Returns named Item(s) in DocSum (indicated by passed argument)
+ Returns : array of Item objects
+ Args : string (Item name)
+
+=head2 get_contents_by_name
+
+ Title : get_contents_by_name
+ Usage : my ($data) = get_contents_by_name('CreateDate')
+ Function : Returns content for named Item(s) in DocSum (indicated by
+ passed argument)
+ Returns : array of values (type varies per Item)
+ Args : string (Item name)
+
+=head2 get_type_by_name
+
+ Title : get_type_by_name
+ Usage : my $data = get_type_by_name('CreateDate')
+ Function : Returns data type for named Item in DocSum (indicated by
+ passed argument)
+ Returns : scalar value (string) if present
+ Args : string (Item name)
+
+=head2 rewind
+
+ Title : rewind
+ Usage : $docsum->rewind();
+ Function : rewinds DocSum iterator
+ Returns : none
+ Args : [optional]
+ 'recursive' - rewind all DocSum object layers
+ (Items, ListItems, StructureItems)
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting
+ Note : Used generally for debugging and for various print methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Summary/Item.pm b/lib/Bio/Tools/EUtilities/Summary/Item.pm
new file mode 100644
index 0000000..d664d3a
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Summary/Item.pm
@@ -0,0 +1,526 @@
+package Bio::Tools::EUtilities::Summary::Item;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Summary::Item::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Root::Root Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Simple layered object for DocSum item data.
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+ my ($type) = $self->_rearrange(['DATATYPE'], at args);
+ $type ||= 'item';
+ $self->datatype($type);
+ $self->eutil('esummary');
+ $self->rewind('recursive');
+ return $self;
+}
+
+
+sub get_ids {
+ my $self = shift;
+ return ($self->{'_id'});
+}
+
+
+sub get_id {
+ my $self = shift;
+ return $self->{'_id'};
+}
+
+
+sub next_ListItem {
+ my $self = shift;
+ unless ($self->{'_lists_it'}) {
+ my @lists = $self->get_ListItems;
+ # reset the structure iterator (required!)
+ delete $self->{'_structures_it'} if $self->{'_structures_it'};
+ $self->{'_lists_it'} = sub {return shift @lists}
+ }
+ return $self->{'_lists_it'}->();
+}
+
+
+sub get_ListItems {
+ my $self = shift;
+ my @items = $self->get_type eq 'List' ? $self->get_subItems : ();
+ return @items;
+}
+
+
+sub next_StructureItem {
+ my $self = shift;
+ unless ($self->{'_structures_it'}) {
+ my @structs = $self->get_StructureItems;
+ $self->{'_structures_it'} = sub {return shift @structs}
+ }
+ return $self->{'_structures_it'}->();
+}
+
+
+sub get_StructureItems {
+ my $self = shift;
+ my @items = $self->get_type eq 'Structure' ? $self->get_subItems : ();
+ return @items;
+}
+
+
+sub next_subItem {
+ my $self = shift;
+ unless ($self->{'_subitem_it'}) {
+ my @structs = $self->get_subItems;
+ $self->{'_subitem_it'} = sub {return shift @structs}
+ }
+ return $self->{'_subitem_it'}->();
+}
+
+
+sub get_subItems {
+ my $self = shift;
+ ref $self->{'_items'} ? return @{ $self->{'_items'} } : return ();
+}
+
+
+sub get_name {
+ my $self = shift;
+ return $self->{'_itemname'};
+}
+
+
+sub get_type {
+ my $self = shift;
+ return $self->{'_itemtype'};
+}
+
+
+sub get_content {
+ my $self = shift;
+ return $self->{'_itemcontent'};
+}
+
+
+sub rewind {
+ my ($self, $request) = @_;
+ if ($request && $request eq 'all') {
+ map {$_->rewind()} $self->get_ListItems;
+ }
+ delete $self->{"_lists_it"} if $self->{"_lists_it"};
+ delete $self->{"_structures_it"} if $self->{"_structures_it"};
+}
+
+
+
+sub next_Item {
+ my ($self, $request) = @_;
+ unless ($self->{"_items_it"}) {
+ #my @items = $self->get_Items;
+ my @items = ($request && $request eq 'flatten') ?
+ $self->get_all_Items :
+ $self->get_Items ;
+ $self->{"_items_it"} = sub {return shift @items}
+ }
+ $self->{'_items_it'}->();
+}
+
+
+sub get_Items {
+ my $self = shift;
+ return ref $self->{'_items'} ? @{ $self->{'_items'} } : return ();
+}
+
+
+sub get_all_Items {
+ my $self = shift;
+ unless ($self->{'_ordered_items'}) {
+ for my $item ($self->get_Items) {
+ push @{$self->{'_ordered_items'}}, $item;
+ for my $ls ($item->get_ListItems) {
+ push @{$self->{'_ordered_items'}}, $ls;
+ for my $st ($ls->get_StructureItems) {
+ push @{$self->{'_ordered_items'}}, $st;
+ }
+ }
+ }
+ }
+ return @{$self->{'_ordered_items'}};
+}
+
+
+sub get_all_names {
+ my ($self) = @_;
+ my %tmp;
+ my @data = grep {!$tmp{$_}++}
+ map {$_->get_name} $self->get_all_Items;
+ return @data;
+}
+
+
+sub get_Items_by_name {
+ my ($self, $key) = @_;
+ return unless $key;
+ my @data = grep {$_->get_name eq $key}
+ $self->get_all_Items;
+ return @data;
+}
+
+
+sub get_contents_by_name {
+ my ($self, $key) = @_;
+ return unless $key;
+ my @data = map {$_->get_content}
+ grep {$_->get_name eq $key}
+ $self->get_all_Items;
+ return @data;
+}
+
+
+sub get_type_by_name {
+ my ($self, $key) = @_;
+ return unless $key;
+ my ($it) = grep {$_->get_name eq $key} $self->get_all_Items;
+ return $it->get_type;
+}
+
+# private data method
+
+sub _add_data {
+ my ($self, $data) = @_;
+ if ($data->{Item}) {
+ my $objtype = lc $data->{Type}.'_item';
+ $self->{'_id'} = $data->{Id} if exists $data->{Id};
+ for my $sd (@{ $data->{Item} } ) {
+ $sd->{Id} = $data->{Id} if exists $data->{Id};
+ my $subdoc = Bio::Tools::EUtilities::Summary::Item->new(
+ -datatype => $objtype,
+ -verbose => $self->verbose);
+ $subdoc->_add_data($sd);
+ push @{ $self->{'_items'} }, $subdoc;
+ }
+ }
+ for my $nm (qw(Type content Name)) {
+ $self->{'_item'.lc $nm} = $data->{$nm} if defined $data->{$nm};
+ }
+ $self->{'_id'} = $data->{Id} if exists $data->{Id};
+}
+
+
+# recursively called to grab subitems, then layer
+
+sub to_string {
+ my $self = shift;
+ my $level = shift || 0;
+ # this is the field length for the initial data (spaces are padded in front)
+ my $pad = 20 - $level;
+ my $content = $self->get_content || '';
+ my $string .= sprintf("%-*s%-*s%s\n",
+ $level, '',
+ $pad, $self->get_name(),
+ $self->_text_wrap(':',
+ ' ' x ($pad).':',
+ $content));
+ for my $sub ($self->get_subItems) {
+ $string .= $sub->to_string(4 + $level);
+ }
+ return $string;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Summary::Item - Simple layered object for DocSum item data.
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # Items can be nested up to three levels at this time. These levels can be
+ # accessed via Item, ListItem, or StructureItem methods:
+
+ while (my $item = $docsum->next_Item) {
+ print "Name: ",$item->get_name,"\n";
+ print "Data: ",$item->get_content,"\n";
+ print "Type: ",$item->get_type,"\n";
+ while (my $ls = $item->next_ListItem) {
+ # do same here
+ while (my $struct = $ls->next_StructureItem) {
+ # do more stuff here
+ }
+ }
+ }
+
+=head1 DESCRIPTION
+
+DocSum data, as returned from esummary, normally is a simple list of
+item-content-content_type groups. However, items can also contain nested data to
+represent more complex data (such as structural data). Up to three nested layers
+may appear in any document summary.
+
+This class contains methods to access data that can appear in a docsum for any
+individual item as well as describes methods to traverse the hierarchy of items
+present in a document summary.
+
+The unique name for items are accessed via get_name(), the content by
+get_content() (if present), and the data type by get_type(). Items can have
+ListItems (Item objects with a datatype() 'list'), which in turn can have
+StructureItems (Item objects with a datatype of 'structure'). Items are
+initially traversed via a DocSum object using next_Item() or obtained all at
+once with get_Items(). Similarly, nested Items can be accessed by using
+next_ListItem/get_ListItems and next_StructureItem/get_StructureItem. A
+flattened list of items can be accessed with get_all_Items().
+
+=head2 new
+
+ Title : new
+ Usage :
+ Function :
+ Returns :
+ Args :
+
+=head2 get_ids
+
+ Title : get_ids
+ Usage : my ($id) = $item->get_ids;
+ Function : returns array or array ref with id
+ Returns : array or array ref
+ Args : none
+ Note : the behavior of this method is to remain consistent with other
+ implementations of get_ids(). To retrieve the single DocSum ID use
+ get_id()
+
+=head2 get_id
+
+ Title : get_id
+ Usage : my ($id) = $item->get_id;
+ Function : returns UID of record
+ Returns : integer
+ Args : none
+
+=head2 next_ListItem
+
+ Title : next_ListItem
+ Usage : while (my $ls = $item->next_ListItem) {...}
+ Function : iterates through ListItems (nested layer of Item)
+ Returns : single ListItem
+ Args : none
+
+=head2 get_ListItems
+
+ Title : get_ListItems
+ Usage : my @ls = $item->get_ListItems
+ Function : returns list of, well, List Items
+ Returns : array of List Items
+ Args : none
+
+=head2 next_StructureItem
+
+ Title : next_StructureItem
+ Usage : while (my $struc = $ls->next_StructureItem) {...}
+ Function : iterates through StructureItems (nested layer of ListItem)
+ Returns : single StructureItems
+ Args : none
+
+=head2 get_StructureItems
+
+ Title : get_StructureItems
+ Usage : my @structs = $ls->get_StructureItems
+ Function : returns list of Structure Items
+ Returns : array of StructureItems
+ Args : none
+
+=head2 next_subItem
+
+ Title : next_subItem
+ Usage : while (my $it = $ls->next_subItem) {...}
+ Function : iterates through the next layer of Items
+ Returns : single Item
+ Args : none
+ Notes : unlike next_ListItem and next_StructureItem, this generically
+ accesses any sub Items (useful for recursive calls, for example)
+
+=head2 get_subItems
+
+ Title : get_subItems
+ Usage : my @items = $ls->get_subItems
+ Function : returns list of sub Items
+ Returns : array of Items
+ Args : none
+ Notes : unlike get_ListItems and get_StructureItems, this generically
+ accesses any sub Items (useful for recursive calls, for example)
+
+=head2 get_name
+
+ Title : get_name
+ Usage : my $nm = $ls->get_name
+ Function : retrieves Item/ListItem/StructureItem name for this Item
+ Returns : string
+ Args : none
+
+=head2 get_type
+
+ Title : get_type
+ Usage : my $type = $ls->get_type
+ Function : retrieves Item/ListItem/StructureItem type
+ Returns : string
+ Args : none
+ Note : this is not the same as the datatype(), which describes the
+ group this Item object belongs to
+
+=head2 get_content
+
+ Title : get_content
+ Usage : my $data = $ls->get_content
+ Function : retrieves Item/ListItem/StructureItem content (if any)
+ Returns : string
+ Args : none
+
+=head2 rewind
+
+ Title : rewind
+ Usage : $item->rewind()
+ Function : rewinds iterators
+ Returns : none
+ Args : [optional] No arg - only rewinds current layer
+ 'all' - rewind all DocSum object layers
+ (Items, ListItems, StructureItems)
+
+=head1 ItemContainerI methods
+
+=head2 next_Item
+
+ Title : next_Item
+ Usage : while (my $item = $docsum->next_Item) {...}
+ Function : iterates through Items (nested layer of Item)
+ Returns : single Item
+ Args : [optional] single arg (string)
+ 'flatten' - iterates through a flattened list ala
+ get_all_DocSum_Items()
+
+=head2 get_Items
+
+ Title : get_Items
+ Usage : my @items = $docsum->get_Items
+ Function : returns list of, well, Items
+ Returns : array of Items
+ Args : none
+
+=head2 get_all_Items
+
+ Title : get_all_Items
+ Usage : my @items = $docsum->get_all_Items
+ Function : returns flattened list of all Item objects (Items, ListItems,
+ StructureItems)
+ Returns : array of Items
+ Args : none
+ Note : items are added top-down (similar order to using nested calls)
+ in original list order.
+
+ 1 2 7 8
+ Item - Item - Item - Item ...
+ |
+ | 3 6
+ ListItem - ListItem
+ |
+ | 4 5
+ Structure - Structure
+
+=head2 get_all_names
+
+ Title : get_all_names
+ Usage : my @names = get_all_names()
+ Function : Returns an array of names for all Item(s) in DocSum.
+ Returns : array of unique strings
+ Args : none
+
+=head2 get_Items_by_name
+
+ Title : get_Items_by_name
+ Usage : my @items = get_Items_by_name('CreateDate')
+ Function : Returns named Item(s) in DocSum (indicated by passed argument)
+ Returns : array of Item objects
+ Args : string (Item name)
+
+=head2 get_contents_by_name
+
+ Title : get_contents_by_name
+ Usage : my ($data) = get_contents_by_name('CreateDate')
+ Function : Returns content for named Item(s) in DocSum (indicated by
+ passed argument)
+ Returns : array of values (type varies per Item)
+ Args : string (Item name)
+
+=head2 get_type_by_name
+
+ Title : get_type_by_name
+ Usage : my $data = get_type_by_name('CreateDate')
+ Function : Returns data type for named Item in DocSum (indicated by
+ passed argument)
+ Returns : scalar value (string) if present
+ Args : string (Item name)
+
+=head2 to_string
+
+ Title : to_string
+ Usage : $foo->to_string()
+ Function : converts current object to string
+ Returns : none
+ Args : (optional) simple data for text formatting. This implementation
+ passes an argument for layering Items/subItems
+ Note : Used generically for debugging and print_DocSums methods
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/lib/Bio/Tools/EUtilities/Summary/ItemContainerI.pm b/lib/Bio/Tools/EUtilities/Summary/ItemContainerI.pm
new file mode 100644
index 0000000..6eafb1f
--- /dev/null
+++ b/lib/Bio/Tools/EUtilities/Summary/ItemContainerI.pm
@@ -0,0 +1,238 @@
+package Bio::Tools::EUtilities::Summary::ItemContainerI;
+our $AUTHORITY = 'cpan:BIOPERLML';
+$Bio::Tools::EUtilities::Summary::ItemContainerI::VERSION = '1.75';
+use utf8;
+use strict;
+use warnings;
+use base qw(Bio::Tools::EUtilities::EUtilDataI);
+
+# ABSTRACT: Abtract interface methods for accessing Item information from any Item-containing class. This pertains to either DocSums or to Items themselves (which can be layered).
+# AUTHOR: Chris Fields <cjfields at bioperl.org>
+# OWNER: 2006-2013 Chris Fields
+# LICENSE: Perl_5
+
+
+
+sub next_Item {
+ my ($self, $request) = @_;
+ unless ($self->{"_items_it"}) {
+ my @items = ($request && $request eq 'flatten') ?
+ $self->get_all_Items :
+ $self->get_Items ;
+ $self->{"_items_it"} = sub {return shift @items}
+ }
+ $self->{'_items_it'}->();
+}
+
+
+sub get_Items {
+ my $self = shift;
+ return ref $self->{'_items'} ? @{ $self->{'_items'} } : return ();
+}
+
+
+sub get_all_Items {
+ my $self = shift;
+ unless ($self->{'_ordered_items'}) {
+ for my $item ($self->get_Items) {
+ push @{$self->{'_ordered_items'}}, $item;
+ for my $ls ($item->get_ListItems) {
+ push @{$self->{'_ordered_items'}}, $ls;
+ for my $st ($ls->get_StructureItems) {
+ push @{$self->{'_ordered_items'}}, $st;
+ }
+ }
+ }
+ }
+ return @{$self->{'_ordered_items'}};
+}
+
+
+sub get_all_names {
+ my ($self) = @_;
+ my %tmp;
+ my @data = grep {!$tmp{$_}++}
+ map {$_->get_name} $self->get_all_Items;
+ return @data;
+}
+
+
+sub get_Items_by_name {
+ my ($self, $key) = @_;
+ return unless $key;
+ my @data = grep {$_->get_name eq $key}
+ $self->get_all_Items;
+ return @data;
+}
+
+
+sub get_contents_by_name {
+ my ($self, $key) = @_;
+ return unless $key;
+ my @data = map {$_->get_content}
+ grep {$_->get_name eq $key}
+ $self->get_all_Items;
+ return @data;
+}
+
+
+sub get_type_by_name {
+ my ($self, $key) = @_;
+ return unless $key;
+ my ($it) = grep {$_->get_name eq $key} $self->get_all_Items;
+ return $it->get_type;
+}
+
+1;
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+Bio::Tools::EUtilities::Summary::ItemContainerI - Abtract interface methods for accessing Item information from any Item-containing class. This pertains to either DocSums or to Items themselves (which can be layered).
+
+=head1 VERSION
+
+version 1.75
+
+=head1 SYNOPSIS
+
+ # Implement ItemContainerI
+
+ # $foo is any ItemContainerI (current implementations are DocSum and Item itself)
+
+ while (my $item = $foo->next_Item) { # iterate through contained Items
+ # do stuff here
+ }
+
+ @items = $foo->get_Items; # all Items in the container (hierarchy intact)
+ @items = $foo->get_all_Items; # all Items in the container (flattened)
+ @items = $foo->get_Items_by_name('bar'); # Specifically named Items
+ ($content) = $foo->get_contents_by_name('bar'); # content from specific Items
+ ($type) = $foo->get_type_by_name('bar'); # data type from specific Items
+
+=head1 DESCRIPTION
+
+DocSum data, as returned from esummary, normally is a simple list of
+item-content-content_type groups. However, items can also contain nested data to
+represent more complex data (such as structural data). This interface describes
+the basic methods to generically retrieve the next layer of Item data. For
+convenience classes may describe more specific methods, but they should be
+defined in terms of this interface and it's methods.
+
+=head2 next_Item
+
+ Title : next_Item
+ Usage : while (my $item = $docsum->next_Item) {...}
+ Function : iterates through Items (nested layer of Item)
+ Returns : single Item
+ Args : [optional] single arg (string)
+ 'flatten' - iterates through a flattened list ala
+ get_all_DocSum_Items()
+
+=head2 get_Items
+
+ Title : get_Items
+ Usage : my @items = $docsum->get_Items
+ Function : returns list of, well, Items
+ Returns : array of Items
+ Args : none
+
+=head2 get_all_Items
+
+ Title : get_all_Items
+ Usage : my @items = $docsum->get_all_Items
+ Function : returns flattened list of all Item objects (Items, ListItems,
+ StructureItems)
+ Returns : array of Items
+ Args : none
+ Note : items are added top-down (similar order to using nested calls)
+ in original list order.
+
+ 1 2 7 8
+ Item - Item - Item - Item ...
+ |
+ | 3 6
+ ListItem - ListItem
+ |
+ | 4 5
+ Structure - Structure
+
+=head2 get_all_names
+
+ Title : get_all_names
+ Usage : my @names = get_all_names()
+ Function : Returns an array of names for all Item(s) in DocSum.
+ Returns : array of unique strings
+ Args : none
+
+=head2 get_Items_by_name
+
+ Title : get_Items_by_name
+ Usage : my @items = get_Items_by_name('CreateDate')
+ Function : Returns named Item(s) in DocSum (indicated by passed argument)
+ Returns : array of Item objects
+ Args : string (Item name)
+
+=head2 get_contents_by_name
+
+ Title : get_contents_by_name
+ Usage : my ($data) = $eutil->get_contents_by_name('CreateDate')
+ Function : Returns content for named Item(s) in DocSum (indicated by
+ passed argument)
+ Returns : array of values (type varies per Item)
+ Args : string (Item name)
+
+=head2 get_type_by_name
+
+ Title : get_type_by_name
+ Usage : my $data = get_type_by_name('CreateDate')
+ Function : Returns data type for named Item in DocSum (indicated by
+ passed argument)
+ Returns : scalar value (string) if present
+ Args : string (Item name)
+
+=head1 FEEDBACK
+
+=head2 Mailing lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Support
+
+Please direct usage questions or support issues to the mailing list:
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
+with code and data examples if at all possible.
+
+=head2 Reporting bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ https://github.com/bioperl/%%7Bdist%7D
+
+=head1 AUTHOR
+
+Chris Fields <cjfields at bioperl.org>
+
+=head1 COPYRIGHT
+
+This software is copyright (c) 2006-2013 by Chris Fields.
+
+This software is available under the same terms as the perl 5 programming language system itself.
+
+=cut
diff --git a/t/00-compile.t b/t/00-compile.t
new file mode 100644
index 0000000..da74d3a
--- /dev/null
+++ b/t/00-compile.t
@@ -0,0 +1,102 @@
+use 5.006;
+use strict;
+use warnings;
+
+# this test was generated with Dist::Zilla::Plugin::Test::Compile 2.054
+
+use Test::More;
+
+plan tests => 20 + ($ENV{AUTHOR_TESTING} ? 1 : 0);
+
+my @module_files = (
+ 'Bio/DB/EUtilities.pm',
+ 'Bio/Tools/EUtilities.pm',
+ 'Bio/Tools/EUtilities/EUtilDataI.pm',
+ 'Bio/Tools/EUtilities/EUtilParameters.pm',
+ 'Bio/Tools/EUtilities/History.pm',
+ 'Bio/Tools/EUtilities/HistoryI.pm',
+ 'Bio/Tools/EUtilities/Info.pm',
+ 'Bio/Tools/EUtilities/Info/FieldInfo.pm',
+ 'Bio/Tools/EUtilities/Info/LinkInfo.pm',
+ 'Bio/Tools/EUtilities/Link.pm',
+ 'Bio/Tools/EUtilities/Link/LinkSet.pm',
+ 'Bio/Tools/EUtilities/Link/UrlLink.pm',
+ 'Bio/Tools/EUtilities/Query.pm',
+ 'Bio/Tools/EUtilities/Query/GlobalQuery.pm',
+ 'Bio/Tools/EUtilities/Summary.pm',
+ 'Bio/Tools/EUtilities/Summary/DocSum.pm',
+ 'Bio/Tools/EUtilities/Summary/Item.pm',
+ 'Bio/Tools/EUtilities/Summary/ItemContainerI.pm'
+);
+
+my @scripts = (
+ 'bin/bp_einfo',
+ 'bin/bp_genbank_ref_extractor'
+);
+
+# no fake home requested
+
+my $inc_switch = -d 'blib' ? '-Mblib' : '-Ilib';
+
+use File::Spec;
+use IPC::Open3;
+use IO::Handle;
+
+open my $stdin, '<', File::Spec->devnull or die "can't open devnull: $!";
+
+my @warnings;
+for my $lib (@module_files)
+{
+ # see L<perlfaq8/How can I capture STDERR from an external command?>
+ my $stderr = IO::Handle->new;
+
+ my $pid = open3($stdin, '>&STDERR', $stderr, $^X, $inc_switch, '-e', "require q[$lib]");
+ binmode $stderr, ':crlf' if $^O eq 'MSWin32';
+ my @_warnings = <$stderr>;
+ waitpid($pid, 0);
+ is($?, 0, "$lib loaded ok");
+
+ shift @_warnings if @_warnings and $_warnings[0] =~ /^Using .*\bblib/
+ and not eval { require blib; blib->VERSION('1.01') };
+
+ if (@_warnings)
+ {
+ warn @_warnings;
+ push @warnings, @_warnings;
+ }
+}
+
+foreach my $file (@scripts)
+{ SKIP: {
+ open my $fh, '<', $file or warn("Unable to open $file: $!"), next;
+ my $line = <$fh>;
+
+ close $fh and skip("$file isn't perl", 1) unless $line =~ /^#!\s*(?:\S*perl\S*)((?:\s+-\w*)*)(?:\s*#.*)?$/;
+ my @flags = $1 ? split(' ', $1) : ();
+
+ my $stderr = IO::Handle->new;
+
+ my $pid = open3($stdin, '>&STDERR', $stderr, $^X, $inc_switch, @flags, '-c', $file);
+ binmode $stderr, ':crlf' if $^O eq 'MSWin32';
+ my @_warnings = <$stderr>;
+ waitpid($pid, 0);
+ is($?, 0, "$file compiled ok");
+
+ shift @_warnings if @_warnings and $_warnings[0] =~ /^Using .*\bblib/
+ and not eval { require blib; blib->VERSION('1.01') };
+
+ # in older perls, -c output is simply the file portion of the path being tested
+ if (@_warnings = grep { !/\bsyntax OK$/ }
+ grep { chomp; $_ ne (File::Spec->splitpath($file))[2] } @_warnings)
+ {
+ warn @_warnings;
+ push @warnings, @_warnings;
+ }
+} }
+
+
+
+is(scalar(@warnings), 0, 'no warnings found')
+ or diag 'got warnings: ', ( Test::More->can('explain') ? Test::More::explain(\@warnings) : join("\n", '', @warnings) ) if $ENV{AUTHOR_TESTING};
+
+
diff --git a/t/EUtilParameters.t b/t/EUtilParameters.t
new file mode 100644
index 0000000..c717507
--- /dev/null
+++ b/t/EUtilParameters.t
@@ -0,0 +1,60 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: esearch.t 15112 2008-12-08 18:12:38Z sendu $
+#
+
+use strict;
+use warnings;
+
+use Test::More tests => 12;
+
+use Bio::Tools::EUtilities::EUtilParameters;
+use inc::TestHelper qw(test_input_file);
+
+my @ids = qw(6679096 31543332 134288853 483581 20805941 187951953 169158074
+123228044 148676374 114326469 148707003 187952787 123233807 148694865 148694864
+148694863 148694861 148694862 8705244 8568086);
+
+my %params = (-eutil => 'efetch',
+ -db => 'nucleotide',
+ -id => \@ids,
+ -email => 'me at foo.bar',
+ -retmode => 'xml');
+
+my $pobj = Bio::Tools::EUtilities::EUtilParameters->new(%params);
+
+# initial 'primed' state
+is($pobj->parameters_changed, 1);
+
+my $request = $pobj->to_request; # 'exhaust' state
+isa_ok($request, 'HTTP::Request');
+is($request->url, 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'.
+ 'db=nucleotide&retmode=xml&id=6679096%2C31543332%2C134288853%2C483581%2C'.
+ '20805941%2C187951953%2C169158074%2C123228044%2C148676374%2C114326469%2C'.
+ '148707003%2C187952787%2C123233807%2C148694865%2C148694864%2C148694863%2C'.
+ '148694861%2C148694862%2C8705244%2C8568086&tool=BioPerl&email=me%40foo.bar');
+is($pobj->to_string(), 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'.
+ 'efetch.fcgi?db=nucleotide&retmode=xml&id=6679096%2C31543332%2C134288853%2C'.
+ '483581%2C20805941%2C187951953%2C169158074%2C123228044%2C148676374%2C'.
+ '114326469%2C148707003%2C187952787%2C123233807%2C148694865%2C148694864%2C'.
+ '148694863%2C148694861%2C148694862%2C8705244%2C8568086'.
+ '&tool=BioPerl&email=me%40foo.bar');
+is($pobj->parameters_changed, 0);
+
+# state won't change if the same parameters are passed
+$pobj->set_parameters(%params);
+is($pobj->parameters_changed, 0);
+$pobj->retmode('xml');
+is($pobj->parameters_changed, 0);
+
+# reprime state with new value
+$pobj->retmode('text');
+is($pobj->parameters_changed, 1);
+
+is(join(',',$pobj->available_parameters('epost')),
+ 'db,retmode,id,tool,email,idtype,WebEnv,query_key', 'available_parameters');
+is(join(',',$pobj->available_parameters('efetch')),
+ 'db,retmode,id,retmax,retstart,rettype,strand,seq_start,seq_stop,complexity,report,tool,email,idtype,WebEnv,query_key', 'available_parameters');
+
+my %data = $pobj->get_parameters;
+is_deeply($data{id}, $params{-id}, 'get_parameters');
+is($data{email}, $params{-email}, 'get_parameters');
diff --git a/t/author-mojibake.t b/t/author-mojibake.t
new file mode 100644
index 0000000..7678aae
--- /dev/null
+++ b/t/author-mojibake.t
@@ -0,0 +1,17 @@
+#!perl
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print "1..0 # SKIP these tests are for testing by the author\n";
+ exit
+ }
+}
+
+
+use strict;
+use warnings qw(all);
+
+use Test::More;
+use Test::Mojibake;
+
+all_files_encoding_ok();
diff --git a/t/author-pod-syntax.t b/t/author-pod-syntax.t
new file mode 100644
index 0000000..858ff45
--- /dev/null
+++ b/t/author-pod-syntax.t
@@ -0,0 +1,15 @@
+#!perl
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print "1..0 # SKIP these tests are for testing by the author\n";
+ exit
+ }
+}
+
+# This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests.
+use strict; use warnings;
+use Test::More;
+use Test::Pod 1.41;
+
+all_pod_files_ok();
diff --git a/t/data/eutils/egquery.xml b/t/data/eutils/egquery.xml
new file mode 100644
index 0000000..e00919a
--- /dev/null
+++ b/t/data/eutils/egquery.xml
@@ -0,0 +1,262 @@
+<?xml version="1.0"?>
+<!DOCTYPE Result PUBLIC "-//NLM//DTD eSearchResult, January 2004//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/egquery.dtd">
+
+<!--
+ $Id: egquery_template.xml 106311 2007-06-26 14:46:31Z osipov $
+-->
+<!-- ================================================================= -->
+
+<Result>
+
+ <Term>Notch AND Mus musculus</Term>
+
+ <eGQueryResult>
+
+ <ResultItem>
+ <DbName>pubmed</DbName>
+ <MenuName>PubMed</MenuName>
+ <Count>1803</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>pmc</DbName>
+ <MenuName>PMC</MenuName>
+ <Count>1301</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>journals</DbName>
+ <MenuName>Journals</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>mesh</DbName>
+ <MenuName>MeSH</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>books</DbName>
+ <MenuName>Books</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>omim</DbName>
+ <MenuName>OMIM</MenuName>
+ <Count>1</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>omia</DbName>
+ <MenuName>OMIA</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>ncbisearch</DbName>
+ <MenuName>NCBI Web Site</MenuName>
+ <Count>1</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>nuccore</DbName>
+ <MenuName>Nucleotide</MenuName>
+ <Count>496</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>nucgss</DbName>
+ <MenuName>GSS</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>nucest</DbName>
+ <MenuName>EST</MenuName>
+ <Count>142</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>protein</DbName>
+ <MenuName>Protein</MenuName>
+ <Count>534</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>genome</DbName>
+ <MenuName>Genome</MenuName>
+ <Count>4</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>structure</DbName>
+ <MenuName>Structure</MenuName>
+ <Count>7</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>taxonomy</DbName>
+ <MenuName>Taxonomy</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>snp</DbName>
+ <MenuName>SNP</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>gene</DbName>
+ <MenuName>Gene</MenuName>
+ <Count>193</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>unigene</DbName>
+ <MenuName>UniGene</MenuName>
+ <Count>22</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>cdd</DbName>
+ <MenuName>Conserved Domains</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>domains</DbName>
+ <MenuName>3D Domains</MenuName>
+ <Count>19</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>unists</DbName>
+ <MenuName>UniSTS</MenuName>
+ <Count>61</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>popset</DbName>
+ <MenuName>PopSet</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>geo</DbName>
+ <MenuName>GEO Profiles</MenuName>
+ <Count>19278</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>gds</DbName>
+ <MenuName>GEO DataSets</MenuName>
+ <Count>34</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>homologene</DbName>
+ <MenuName>HomoloGene</MenuName>
+ <Count>37</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>cancerchromosomes</DbName>
+ <MenuName>CancerChromosomes</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>pccompound</DbName>
+ <MenuName>PubChem Compound</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>pcsubstance</DbName>
+ <MenuName>PubChem Substance</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>pcassay</DbName>
+ <MenuName>PubChem BioAssay</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>nlmcatalog</DbName>
+ <MenuName>NLM Catalog</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>gensat</DbName>
+ <MenuName>GENSAT</MenuName>
+ <Count>148</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>probe</DbName>
+ <MenuName>Probe</MenuName>
+ <Count>260</Count>
+ <Status>Ok</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>genomeprj</DbName>
+ <MenuName>Genome Project</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>gap</DbName>
+ <MenuName>dbGaP</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ <ResultItem>
+ <DbName>proteinclusters</DbName>
+ <MenuName>Protein Clusters</MenuName>
+ <Count>0</Count>
+ <Status>Term or Database is not found</Status>
+ </ResultItem>
+
+ </eGQueryResult>
+
+</Result>
diff --git a/t/data/eutils/einfo.xml b/t/data/eutils/einfo.xml
new file mode 100644
index 0000000..6e54d53
--- /dev/null
+++ b/t/data/eutils/einfo.xml
@@ -0,0 +1,742 @@
+<?xml version="1.0"?>
+<!DOCTYPE eInfoResult PUBLIC "-//NLM//DTD eInfoResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eInfo_020511.dtd">
+<eInfoResult>
+<DbInfo>
+ <DbName>pubmed</DbName>
+ <MenuName>PubMed</MenuName>
+ <Description>PubMed bibliographic record</Description>
+ <Count>18525976</Count>
+ <LastUpdate>2008/12/11 02:33</LastUpdate>
+ <FieldList>
+ <Field>
+ <Name>ALL</Name>
+ <FullName>All Fields</FullName>
+ <Description>All terms from all searchable fields</Description>
+ <TermCount>78295829</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>UID</Name>
+ <FullName>UID</FullName>
+ <Description>Unique number assigned to publication</Description>
+ <TermCount>0</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>Y</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>FILT</Name>
+ <FullName>Filter</FullName>
+ <Description>Limits the records</Description>
+ <TermCount>2950</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>TITL</Name>
+ <FullName>Title</FullName>
+ <Description>Words in title of publication</Description>
+ <TermCount>11567768</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>WORD</Name>
+ <FullName>Text Word</FullName>
+ <Description>Free text associated with publication</Description>
+ <TermCount>35440892</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>MESH</Name>
+ <FullName>MeSH Terms</FullName>
+ <Description>Medical Subject Headings assigned to publication</Description>
+ <TermCount>534667</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>Y</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>MAJR</Name>
+ <FullName>MeSH Major Topic</FullName>
+ <Description>MeSH terms of major importance to publication</Description>
+ <TermCount>475024</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>Y</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>AUTH</Name>
+ <FullName>Author</FullName>
+ <Description>Author(s) of publication</Description>
+ <TermCount>10494896</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>JOUR</Name>
+ <FullName>Journal</FullName>
+ <Description>Journal abbreviation of publication</Description>
+ <TermCount>103795</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>AFFL</Name>
+ <FullName>Affiliation</FullName>
+ <Description>Author's institutional affiliation and address</Description>
+ <TermCount>14776781</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>ECNO</Name>
+ <FullName>EC/RN Number</FullName>
+ <Description>EC number for enzyme or CAS registry number</Description>
+ <TermCount>83072</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>SUBS</Name>
+ <FullName>Substance Name</FullName>
+ <Description>CAS chemical name or MEDLINE Substance Name</Description>
+ <TermCount>190637</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>PDAT</Name>
+ <FullName>Publication Date</FullName>
+ <Description>Date of publication</Description>
+ <TermCount>28527</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>EDAT</Name>
+ <FullName>Entrez Date</FullName>
+ <Description>Date publication first accessible through Entrez</Description>
+ <TermCount>27090</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>VOL</Name>
+ <FullName>Volume</FullName>
+ <Description>Volume number of publication</Description>
+ <TermCount>11636</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>PAGE</Name>
+ <FullName>Pagination</FullName>
+ <Description>Page number(s) of publication</Description>
+ <TermCount>1182496</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>PTYP</Name>
+ <FullName>Publication Type</FullName>
+ <Description>Type of publication (e.g., review)</Description>
+ <TermCount>69</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>Y</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>LANG</Name>
+ <FullName>Language</FullName>
+ <Description>Language of publication</Description>
+ <TermCount>57</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>ISS</Name>
+ <FullName>Issue</FullName>
+ <Description>Issue number of publication</Description>
+ <TermCount>16440</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>SUBH</Name>
+ <FullName>MeSH Subheading</FullName>
+ <Description>Additional specificity for MeSH term</Description>
+ <TermCount>83</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>Y</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>SI</Name>
+ <FullName>Secondary Source ID</FullName>
+ <Description>Cross-reference from publication to other databases</Description>
+ <TermCount>3018329</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>MHDA</Name>
+ <FullName>MeSH Date</FullName>
+ <Description>Date publication was indexed with MeSH terms</Description>
+ <TermCount>27022</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>TIAB</Name>
+ <FullName>Title/Abstract</FullName>
+ <Description>Free text associated with Abstract/Title</Description>
+ <TermCount>32236315</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>OTRM</Name>
+ <FullName>Other Term</FullName>
+ <Description>Other terms associated with publication</Description>
+ <TermCount>253100</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>INVR</Name>
+ <FullName>Investigator</FullName>
+ <Description>Investigator</Description>
+ <TermCount>229376</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>COLN</Name>
+ <FullName>Corporate Author</FullName>
+ <Description>Corporate Author of publication</Description>
+ <TermCount>111513</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>CNTY</Name>
+ <FullName>Place of Publication</FullName>
+ <Description>Country of publication</Description>
+ <TermCount>279</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>PAPX</Name>
+ <FullName>Pharmacological Action</FullName>
+ <Description>MeSH pharmacological action pre-explosions</Description>
+ <TermCount>408</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>GRNT</Name>
+ <FullName>Grant Number</FullName>
+ <Description>NIH Grant Numbers</Description>
+ <TermCount>2062114</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>MDAT</Name>
+ <FullName>Modification Date</FullName>
+ <Description>Date of last modification</Description>
+ <TermCount>2226</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>CDAT</Name>
+ <FullName>Completion Date</FullName>
+ <Description>Date of completion</Description>
+ <TermCount>8786</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>PID</Name>
+ <FullName>Publisher ID</FullName>
+ <Description>Publisher ID</Description>
+ <TermCount>6747216</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>FAUT</Name>
+ <FullName>First Author</FullName>
+ <Description>First Author of publication</Description>
+ <TermCount>5669708</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>FULL</Name>
+ <FullName>Full Author Name</FullName>
+ <Description>Full Author Name(s) of publication</Description>
+ <TermCount>5368997</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>FINV</Name>
+ <FullName>Full Investigator Name</FullName>
+ <Description>Full name of investigator</Description>
+ <TermCount>90832</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>TT</Name>
+ <FullName>Transliterated Title</FullName>
+ <Description>Words in transliterated title of publication</Description>
+ <TermCount>2140395</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>N</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>LAUT</Name>
+ <FullName>Last Author</FullName>
+ <Description>Last Author of publication</Description>
+ <TermCount>5383203</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>PPDT</Name>
+ <FullName>Print Publication Date</FullName>
+ <Description>Date of print publication</Description>
+ <TermCount>28485</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>EPDT</Name>
+ <FullName>Electronic Publication Date</FullName>
+ <Description>Date of Electronic publication</Description>
+ <TermCount>3601</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>Y</IsHidden>
+ </Field>
+ <Field>
+ <Name>LID</Name>
+ <FullName>Location ID</FullName>
+ <Description>ELocation ID</Description>
+ <TermCount>5531</TermCount>
+ <IsDate>N</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ <Field>
+ <Name>CRDT</Name>
+ <FullName>Create Date</FullName>
+ <Description>Date publication first accessible through Entrez</Description>
+ <TermCount>27090</TermCount>
+ <IsDate>Y</IsDate>
+ <IsNumerical>N</IsNumerical>
+ <SingleToken>Y</SingleToken>
+ <Hierarchy>N</Hierarchy>
+ <IsHidden>N</IsHidden>
+ </Field>
+ </FieldList>
+ <LinkList>
+ <Link>
+ <Name>pubmed_books_refs</Name>
+ <Menu>Cited in Books</Menu>
+ <Description>PubMed links associated with Books</Description>
+ <DbTo>books</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_cancerchromosomes</Name>
+ <Menu>CancerChrom Links</Menu>
+ <Description>Related Cancer Chromosomes</Description>
+ <DbTo>cancerchromosomes</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_cdd</Name>
+ <Menu>Domain Links</Menu>
+ <Description>Link to related CDD entry</Description>
+ <DbTo>cdd</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_domains</Name>
+ <Menu>3D Domain Links</Menu>
+ <Description>Related 3D Domains</Description>
+ <DbTo>domains</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_gap</Name>
+ <Menu>dbGaP Links</Menu>
+ <Description>Related dbGaP record</Description>
+ <DbTo>gap</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_gds</Name>
+ <Menu>GEO DataSet Links</Menu>
+ <Description>Related GEO DataSets</Description>
+ <DbTo>gds</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_gene</Name>
+ <Menu>Gene Links</Menu>
+ <Description>Link to related Genes</Description>
+ <DbTo>gene</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_gene_citedinomim</Name>
+ <Menu>Gene (OMIM) Links</Menu>
+ <Description>PubMed links to Gene derived from pubmed_omim_cited links</Description>
+ <DbTo>gene</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_gene_rif</Name>
+ <Menu>Gene (GeneRIF) Links</Menu>
+ <Description>Link to Gene for the GeneRIF subcategory</Description>
+ <DbTo>gene</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_genome</Name>
+ <Menu>Genome Links</Menu>
+ <Description>Published genome sequences</Description>
+ <DbTo>genome</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_genomeprj</Name>
+ <Menu>Project Links</Menu>
+ <Description>Related Projects</Description>
+ <DbTo>genomeprj</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_gensat</Name>
+ <Menu>GENSAT Links</Menu>
+ <Description>Related GENSAT</Description>
+ <DbTo>gensat</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_geo</Name>
+ <Menu>GEO Profile Links</Menu>
+ <Description>GEO records associated with pubmed record</Description>
+ <DbTo>geo</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_homologene</Name>
+ <Menu>HomoloGene Links</Menu>
+ <Description>Related HomoloGene</Description>
+ <DbTo>homologene</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_nuccore</Name>
+ <Menu>CoreNucleotide Links</Menu>
+ <Description>Published CoreNucleotide sequences</Description>
+ <DbTo>nuccore</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_nuccore_weighted</Name>
+ <Menu>Nucleotide (Weighted) Links</Menu>
+ <Description>Links to nuccore</Description>
+ <DbTo>nuccore</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_nucest</Name>
+ <Menu>EST Links</Menu>
+ <Description>Published EST sequences</Description>
+ <DbTo>nucest</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_nucgss</Name>
+ <Menu>GSS Links</Menu>
+ <Description>Published GSS sequences</Description>
+ <DbTo>nucgss</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_nucleotide</Name>
+ <Menu>Nucleotide Links</Menu>
+ <Description>Published nucleotide sequences</Description>
+ <DbTo>nucleotide</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_nucleotide_refseq</Name>
+ <Menu>Nucleotide (RefSeq) Links</Menu>
+ <Description>Link to Nucleotide RefSeqs</Description>
+ <DbTo>nucleotide</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_omia</Name>
+ <Menu>OMIA Links</Menu>
+ <Description>Related OMIA articles</Description>
+ <DbTo>omia</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_omim_calculated</Name>
+ <Menu>OMIM (calculated) Links</Menu>
+ <Description>OMIM (calculated) Links</Description>
+ <DbTo>omim</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_omim_cited</Name>
+ <Menu>OMIM (cited) Links</Menu>
+ <Description>OMIM (cited) Links</Description>
+ <DbTo>omim</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pcassay</Name>
+ <Menu>BioAssay Links</Menu>
+ <Description>Related PubChem BioAssay</Description>
+ <DbTo>pcassay</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pccompound</Name>
+ <Menu>Compound Links</Menu>
+ <Description>Related PubChem Compound</Description>
+ <DbTo>pccompound</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pccompound_mesh</Name>
+ <Menu>Compound (MeSH Keyword)</Menu>
+ <Description>Related PubChem Compound via MeSH</Description>
+ <DbTo>pccompound</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pccompound_publisher</Name>
+ <Menu>Compound (Publisher) Links</Menu>
+ <Description>Publisher deposited structures linked to PubChem Compound</Description>
+ <DbTo>pccompound</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pcsubstance</Name>
+ <Menu>Substance Links</Menu>
+ <Description>Related PubChem Substance</Description>
+ <DbTo>pcsubstance</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pcsubstance_mesh</Name>
+ <Menu>Substance (MeSH Keyword)</Menu>
+ <Description>Related PubChem Substance</Description>
+ <DbTo>pcsubstance</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pcsubstance_publisher</Name>
+ <Menu>Substance (Publisher) Links</Menu>
+ <Description>PubChem Substances supplied by publisher</Description>
+ <DbTo>pcsubstance</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pmc</Name>
+ <Menu>PMC Links</Menu>
+ <Description>Free full text articles in PMC</Description>
+ <DbTo>pmc</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pmc_local</Name>
+ <Menu></Menu>
+ <Description>Free full text articles in PMC</Description>
+ <DbTo>pmc</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pmc_refs</Name>
+ <Menu>Cited in PMC</Menu>
+ <Description>PubMed links associated with PMC</Description>
+ <DbTo>pmc</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_popset</Name>
+ <Menu>PopSet Links</Menu>
+ <Description>Published population set</Description>
+ <DbTo>popset</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_probe</Name>
+ <Menu>Probe Links</Menu>
+ <Description>Related Probe entry</Description>
+ <DbTo>probe</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_protein</Name>
+ <Menu>Protein Links</Menu>
+ <Description>Published protein sequences</Description>
+ <DbTo>protein</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_protein_refseq</Name>
+ <Menu>Protein (RefSeq) Links</Menu>
+ <Description>Link to Protein RefSeqs</Description>
+ <DbTo>protein</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_proteinclusters</Name>
+ <Menu>Protein Cluster Links</Menu>
+ <Description>Related Protein Clusters</Description>
+ <DbTo>proteinclusters</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pubmed</Name>
+ <Menu>Related Articles</Menu>
+ <Description>Related PubMed articles, obtained by matching text and MeSH terms</Description>
+ <DbTo>pubmed</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_pubmed_refs</Name>
+ <Menu>Cited Articles</Menu>
+ <Description>Referenced PubMed articles</Description>
+ <DbTo>pubmed</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_snp</Name>
+ <Menu>SNP Links</Menu>
+ <Description>PubMed to SNP links</Description>
+ <DbTo>snp</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_snp_cited</Name>
+ <Menu>SNP (Cited)</Menu>
+ <Description>Related SNP (Cited) records</Description>
+ <DbTo>snp</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_structure</Name>
+ <Menu>Structure Links</Menu>
+ <Description>Published 3D structures</Description>
+ <DbTo>structure</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_taxonomy_entrez</Name>
+ <Menu>Taxonomy via GenBank</Menu>
+ <Description>Related Taxonomy entry computed using other Entrez links</Description>
+ <DbTo>taxonomy</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_unigene</Name>
+ <Menu>UniGene Links</Menu>
+ <Description>Related UniGene</Description>
+ <DbTo>unigene</DbTo>
+ </Link>
+ <Link>
+ <Name>pubmed_unists</Name>
+ <Menu>UniSTS Links</Menu>
+ <Description>Related UniSTS records</Description>
+ <DbTo>unists</DbTo>
+ </Link>
+ </LinkList>
+</DbInfo>
+</eInfoResult>
diff --git a/t/data/eutils/einfo_dbs.xml b/t/data/eutils/einfo_dbs.xml
new file mode 100644
index 0000000..46cdd69
--- /dev/null
+++ b/t/data/eutils/einfo_dbs.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<!DOCTYPE eInfoResult PUBLIC "-//NLM//DTD eInfoResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eInfo_020511.dtd">
+<eInfoResult>
+<DbList>
+ <DbName>pubmed</DbName>
+ <DbName>protein</DbName>
+ <DbName>nucleotide</DbName>
+ <DbName>nuccore</DbName>
+ <DbName>nucgss</DbName>
+ <DbName>nucest</DbName>
+ <DbName>structure</DbName>
+ <DbName>genome</DbName>
+ <DbName>books</DbName>
+ <DbName>cancerchromosomes</DbName>
+ <DbName>cdd</DbName>
+ <DbName>gap</DbName>
+ <DbName>domains</DbName>
+ <DbName>gene</DbName>
+ <DbName>genomeprj</DbName>
+ <DbName>gensat</DbName>
+ <DbName>geo</DbName>
+ <DbName>gds</DbName>
+ <DbName>homologene</DbName>
+ <DbName>journals</DbName>
+ <DbName>mesh</DbName>
+ <DbName>ncbisearch</DbName>
+ <DbName>nlmcatalog</DbName>
+ <DbName>omia</DbName>
+ <DbName>omim</DbName>
+ <DbName>pmc</DbName>
+ <DbName>popset</DbName>
+ <DbName>probe</DbName>
+ <DbName>proteinclusters</DbName>
+ <DbName>pcassay</DbName>
+ <DbName>pccompound</DbName>
+ <DbName>pcsubstance</DbName>
+ <DbName>snp</DbName>
+ <DbName>taxonomy</DbName>
+ <DbName>toolkit</DbName>
+ <DbName>unigene</DbName>
+ <DbName>unists</DbName>
+</DbList>
+</eInfoResult>
diff --git a/t/data/eutils/elink_acheck.xml b/t/data/eutils/elink_acheck.xml
new file mode 100644
index 0000000..ff770ea
--- /dev/null
+++ b/t/data/eutils/elink_acheck.xml
@@ -0,0 +1,439 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <IdLinkSet>
+ <Id>730439</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pmc</DbTo>
+ <LinkName>protein_pmc</LinkName>
+ <MenuTag>PMC Links</MenuTag>
+ <HtmlTag>Full text in PMC</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>LinkOut</DbTo>
+ <LinkName>ExternalLink</LinkName>
+ <MenuTag>LinkOut</MenuTag>
+ <HtmlTag>LinkOut</HtmlTag>
+ <Priority>255</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ <IdLinkSet>
+ <Id>68536103</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>gene</DbTo>
+ <LinkName>protein_gene</LinkName>
+ <MenuTag>Gene Links</MenuTag>
+ <HtmlTag>Gene</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genome</DbTo>
+ <LinkName>protein_genome</LinkName>
+ <MenuTag>Genome Links</MenuTag>
+ <HtmlTag>Genome</HtmlTag>
+ <Priority>160</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj</LinkName>
+ <MenuTag>Genome Project Links</MenuTag>
+ <HtmlTag>Genome Project</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>nuccore</DbTo>
+ <LinkName>protein_nuccore</LinkName>
+ <MenuTag>CoreNucleotide Links</MenuTag>
+ <HtmlTag>CoreNucleotide</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pmc</DbTo>
+ <LinkName>protein_pmc</LinkName>
+ <MenuTag>PMC Links</MenuTag>
+ <HtmlTag>Full text in PMC</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>proteinclusters</DbTo>
+ <LinkName>protein_proteinclusters</LinkName>
+ <MenuTag>Protein Cluster Links</MenuTag>
+ <HtmlTag>Protein Clusters</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <MenuTag>PubMed (RefSeq) Links</MenuTag>
+ <HtmlTag>PubMed (RefSeq)</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ <IdLinkSet>
+ <Id>1621261</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>gene</DbTo>
+ <LinkName>protein_gene</LinkName>
+ <MenuTag>Gene Links</MenuTag>
+ <HtmlTag>Gene</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj</LinkName>
+ <MenuTag>Genome Project Links</MenuTag>
+ <HtmlTag>Genome Project</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>nuccore</DbTo>
+ <LinkName>protein_nuccore</LinkName>
+ <MenuTag>CoreNucleotide Links</MenuTag>
+ <HtmlTag>CoreNucleotide</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>LinkOut</DbTo>
+ <LinkName>ExternalLink</LinkName>
+ <MenuTag>LinkOut</MenuTag>
+ <HtmlTag>LinkOut</HtmlTag>
+ <Priority>255</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ <IdLinkSet>
+ <Id>20807972</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>gene</DbTo>
+ <LinkName>protein_gene</LinkName>
+ <MenuTag>Gene Links</MenuTag>
+ <HtmlTag>Gene</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genome</DbTo>
+ <LinkName>protein_genome</LinkName>
+ <MenuTag>Genome Links</MenuTag>
+ <HtmlTag>Genome</HtmlTag>
+ <Priority>160</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj</LinkName>
+ <MenuTag>Genome Project Links</MenuTag>
+ <HtmlTag>Genome Project</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>nuccore</DbTo>
+ <LinkName>protein_nuccore</LinkName>
+ <MenuTag>CoreNucleotide Links</MenuTag>
+ <HtmlTag>CoreNucleotide</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pmc</DbTo>
+ <LinkName>protein_pmc</LinkName>
+ <MenuTag>PMC Links</MenuTag>
+ <HtmlTag>Full text in PMC</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_refseq2uniprot</LinkName>
+ <MenuTag>Protein (UniProtKB)</MenuTag>
+ <HtmlTag>Protein (UniProtKB)</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>proteinclusters</DbTo>
+ <LinkName>protein_proteinclusters</LinkName>
+ <MenuTag>Protein Cluster Links</MenuTag>
+ <HtmlTag>Protein Clusters</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <MenuTag>PubMed (RefSeq) Links</MenuTag>
+ <HtmlTag>PubMed (RefSeq)</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>LinkOut</DbTo>
+ <LinkName>ExternalLink</LinkName>
+ <MenuTag>LinkOut</MenuTag>
+ <HtmlTag>LinkOut</HtmlTag>
+ <Priority>255</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ </IdCheckList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_acheck_corr.xml b/t/data/eutils/elink_acheck_corr.xml
new file mode 100644
index 0000000..54218e5
--- /dev/null
+++ b/t/data/eutils/elink_acheck_corr.xml
@@ -0,0 +1,454 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <IdLinkSet>
+ <Id>1621261</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>gene</DbTo>
+ <LinkName>protein_gene</LinkName>
+ <MenuTag>Gene Links</MenuTag>
+ <HtmlTag>Gene</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj</LinkName>
+ <MenuTag>Genome Project Links</MenuTag>
+ <HtmlTag>Genome Project</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>nuccore</DbTo>
+ <LinkName>protein_nuccore</LinkName>
+ <MenuTag>CoreNucleotide Links</MenuTag>
+ <HtmlTag>CoreNucleotide</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>LinkOut</DbTo>
+ <LinkName>ExternalLink</LinkName>
+ <MenuTag>LinkOut</MenuTag>
+ <HtmlTag>LinkOut</HtmlTag>
+ <Priority>255</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <IdLinkSet>
+ <Id>68536103</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>gene</DbTo>
+ <LinkName>protein_gene</LinkName>
+ <MenuTag>Gene Links</MenuTag>
+ <HtmlTag>Gene</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genome</DbTo>
+ <LinkName>protein_genome</LinkName>
+ <MenuTag>Genome Links</MenuTag>
+ <HtmlTag>Genome</HtmlTag>
+ <Priority>160</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj</LinkName>
+ <MenuTag>Genome Project Links</MenuTag>
+ <HtmlTag>Genome Project</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>nuccore</DbTo>
+ <LinkName>protein_nuccore</LinkName>
+ <MenuTag>CoreNucleotide Links</MenuTag>
+ <HtmlTag>CoreNucleotide</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pmc</DbTo>
+ <LinkName>protein_pmc</LinkName>
+ <MenuTag>PMC Links</MenuTag>
+ <HtmlTag>Full text in PMC</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>proteinclusters</DbTo>
+ <LinkName>protein_proteinclusters</LinkName>
+ <MenuTag>Protein Cluster Links</MenuTag>
+ <HtmlTag>Protein Clusters</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <MenuTag>PubMed (RefSeq) Links</MenuTag>
+ <HtmlTag>PubMed (RefSeq)</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <IdLinkSet>
+ <Id>20807972</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>gene</DbTo>
+ <LinkName>protein_gene</LinkName>
+ <MenuTag>Gene Links</MenuTag>
+ <HtmlTag>Gene</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genome</DbTo>
+ <LinkName>protein_genome</LinkName>
+ <MenuTag>Genome Links</MenuTag>
+ <HtmlTag>Genome</HtmlTag>
+ <Priority>160</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj</LinkName>
+ <MenuTag>Genome Project Links</MenuTag>
+ <HtmlTag>Genome Project</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>nuccore</DbTo>
+ <LinkName>protein_nuccore</LinkName>
+ <MenuTag>CoreNucleotide Links</MenuTag>
+ <HtmlTag>CoreNucleotide</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pmc</DbTo>
+ <LinkName>protein_pmc</LinkName>
+ <MenuTag>PMC Links</MenuTag>
+ <HtmlTag>Full text in PMC</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_refseq2uniprot</LinkName>
+ <MenuTag>Protein (UniProtKB)</MenuTag>
+ <HtmlTag>Protein (UniProtKB)</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>proteinclusters</DbTo>
+ <LinkName>protein_proteinclusters</LinkName>
+ <MenuTag>Protein Cluster Links</MenuTag>
+ <HtmlTag>Protein Clusters</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <MenuTag>PubMed (RefSeq) Links</MenuTag>
+ <HtmlTag>PubMed (RefSeq)</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>LinkOut</DbTo>
+ <LinkName>ExternalLink</LinkName>
+ <MenuTag>LinkOut</MenuTag>
+ <HtmlTag>LinkOut</HtmlTag>
+ <Priority>255</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <IdLinkSet>
+ <Id>730439</Id>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd</LinkName>
+ <MenuTag>Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_concise_2</LinkName>
+ <MenuTag>Concise Conserved Domain Links</MenuTag>
+ <HtmlTag></HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>cdd</DbTo>
+ <LinkName>protein_cdd_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Conserved Domains</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/cdd/wrpsb.cgi?INPUT_TYPE=precalc&SEQUENCE=<@UID@>]]></Url>
+ <Priority>55</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pmc</DbTo>
+ <LinkName>protein_pmc</LinkName>
+ <MenuTag>PMC Links</MenuTag>
+ <HtmlTag>Full text in PMC</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <MenuTag>Related Sequences</MenuTag>
+ <HtmlTag>Related Sequences</HtmlTag>
+ <Priority>140</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_cdart_summary</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Domain Relatives</HtmlTag>
+ <Url><![CDATA[http://www.ncbi.nlm.nih.gov/Structure/lexington/lexington.cgi?cmd=prot&uid=<@UID@>]]></Url>
+ <Priority>150</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <MenuTag>Identical Proteins</MenuTag>
+ <HtmlTag>Identical Proteins</HtmlTag>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <MenuTag>PubMed Links</MenuTag>
+ <HtmlTag>PubMed</HtmlTag>
+ <Priority>185</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>structure</DbTo>
+ <LinkName>protein_structure_related</LinkName>
+ <MenuTag></MenuTag>
+ <HtmlTag>Related Structure</HtmlTag>
+ <Url><![CDATA[http://structure.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi?client=entrez&query_gi=<@UID@>]]></Url>
+ <Priority>128</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <MenuTag>Taxonomy Links</MenuTag>
+ <HtmlTag>Taxonomy</HtmlTag>
+ <Priority>200</Priority>
+ </LinkInfo>
+ <LinkInfo>
+ <DbTo>LinkOut</DbTo>
+ <LinkName>ExternalLink</LinkName>
+ <MenuTag>LinkOut</MenuTag>
+ <HtmlTag>LinkOut</HtmlTag>
+ <Priority>255</Priority>
+ </LinkInfo>
+ </IdLinkSet>
+ </IdCheckList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_dball.xml b/t/data/eutils/elink_dball.xml
new file mode 100644
index 0000000..834dbe8
--- /dev/null
+++ b/t/data/eutils/elink_dball.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>15622530</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>gene</DbTo>
+ <LinkName>protein_gene</LinkName>
+ <Link>
+ <Id>1459485</Id>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj</LinkName>
+ <Link>
+ <Id>246</Id>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>genomeprj</DbTo>
+ <LinkName>protein_genomeprj_insdc</LinkName>
+ <Info>Empty result</Info>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>nuccore</DbTo>
+ <LinkName>protein_nuccore</LinkName>
+ <Link>
+ <Id>47118305</Id>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>nucest</DbTo>
+ <LinkName>protein_nucest_mgc</LinkName>
+ <Info>Empty result</Info>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>nucgss</DbTo>
+ <LinkName>protein_nucgss_mgc</LinkName>
+ <Info>Empty result</Info>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <Link>
+ <Id>15622530</Id>
+ <Score>2147483647</Score>
+ </Link>
+ <Link>
+ <Id>15921743</Id>
+ <Score>381</Score>
+ </Link>
+ <Link>
+ <Id>70607303</Id>
+ <Score>178</Score>
+ </Link>
+ <Link>
+ <Id>68567951</Id>
+ <Score>178</Score>
+ </Link>
+ <Link>
+ <Id>145702933</Id>
+ <Score>161</Score>
+ </Link>
+ <Link>
+ <Id>146304683</Id>
+ <Score>161</Score>
+ </Link>
+ <Link>
+ <Id>6015889</Id>
+ <Score>142</Score>
+ </Link>
+ <Link>
+ <Id>13813749</Id>
+ <Score>142</Score>
+ </Link>
+ <Link>
+ <Id>15897502</Id>
+ <Score>142</Score>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <Link>
+ <Id>15622530</Id>
+ <Score>2147483647</Score>
+ </Link>
+ <Link>
+ <Id>74573864</Id>
+ <Score>0</Score>
+ </Link>
+ <Link>
+ <Id>15921743</Id>
+ <Score>0</Score>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <Link>
+ <Id>11572479</Id>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <Link>
+ <Id>273063</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_lcheck.xml b/t/data/eutils/elink_lcheck.xml
new file mode 100644
index 0000000..c2094e7
--- /dev/null
+++ b/t/data/eutils/elink_lcheck.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasLinkOut="Y">730439</Id>
+ <Id HasLinkOut="N">68536103</Id>
+ <Id HasLinkOut="Y">1621261</Id>
+ <Id HasLinkOut="Y">20807972</Id>
+ </IdCheckList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_lcheck_corr.xml b/t/data/eutils/elink_lcheck_corr.xml
new file mode 100644
index 0000000..00d600f
--- /dev/null
+++ b/t/data/eutils/elink_lcheck_corr.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasLinkOut="Y">1621261</Id>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasLinkOut="N">68536103</Id>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasLinkOut="Y">20807972</Id>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasLinkOut="Y">730439</Id>
+ </IdCheckList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_llinks.xml b/t/data/eutils/elink_llinks.xml
new file mode 100644
index 0000000..ae1a0b2
--- /dev/null
+++ b/t/data/eutils/elink_llinks.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdUrlList>
+ <IdUrlSet>
+ <Id>730439</Id>
+ <ObjUrl>
+ <Url>http://salilab.org/modbase/search?databaseID=730439</Url>
+ <IconUrl>http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg</IconUrl>
+ <SubjectType>structure</SubjectType>
+ <Attribute>registration required</Attribute>
+ <Provider>
+ <Name>MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)</Name>
+ <NameAbbr>modbase</NameAbbr>
+ <Id>5447</Id>
+ <Url>http://salilab.org/modbase</Url>
+ <IconUrl>http://salilab.org/modbase/img/modbase.jpg</IconUrl>
+ </Provider>
+ </ObjUrl>
+ </IdUrlSet>
+ <IdUrlSet>
+ <Id>68536103</Id>
+ <Info>No links</Info>
+ </IdUrlSet>
+ <IdUrlSet>
+ <Id>1621261</Id>
+ <ObjUrl>
+ <Url>http://salilab.org/modbase/search?databaseID=1621261</Url>
+ <IconUrl>http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg</IconUrl>
+ <SubjectType>structure</SubjectType>
+ <Attribute>registration required</Attribute>
+ <Provider>
+ <Name>MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)</Name>
+ <NameAbbr>modbase</NameAbbr>
+ <Id>5447</Id>
+ <Url>http://salilab.org/modbase</Url>
+ <IconUrl>http://salilab.org/modbase/img/modbase.jpg</IconUrl>
+ </Provider>
+ </ObjUrl>
+ </IdUrlSet>
+ <IdUrlSet>
+ <Id>20807972</Id>
+ <ObjUrl>
+ <Url>http://salilab.org/modbase/search?databaseID=20807972</Url>
+ <IconUrl>http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg</IconUrl>
+ <SubjectType>structure</SubjectType>
+ <Attribute>registration required</Attribute>
+ <Provider>
+ <Name>MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)</Name>
+ <NameAbbr>modbase</NameAbbr>
+ <Id>5447</Id>
+ <Url>http://salilab.org/modbase</Url>
+ <IconUrl>http://salilab.org/modbase/img/modbase.jpg</IconUrl>
+ </Provider>
+ </ObjUrl>
+ </IdUrlSet>
+ </IdUrlList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_llinks_corr.xml b/t/data/eutils/elink_llinks_corr.xml
new file mode 100644
index 0000000..0a2e0d7
--- /dev/null
+++ b/t/data/eutils/elink_llinks_corr.xml
@@ -0,0 +1,76 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdUrlList>
+ <IdUrlSet>
+ <Id>1621261</Id>
+ <ObjUrl>
+ <Url>http://salilab.org/modbase/search?databaseID=1621261</Url>
+ <IconUrl>http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg</IconUrl>
+ <SubjectType>structure</SubjectType>
+ <Attribute>registration required</Attribute>
+ <Provider>
+ <Name>MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)</Name>
+ <NameAbbr>modbase</NameAbbr>
+ <Id>5447</Id>
+ <Url>http://salilab.org/modbase</Url>
+ <IconUrl>http://salilab.org/modbase/img/modbase.jpg</IconUrl>
+ </Provider>
+ </ObjUrl>
+ </IdUrlSet>
+ </IdUrlList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdUrlList>
+ <IdUrlSet>
+ <Id>68536103</Id>
+ <Info>No links</Info>
+ </IdUrlSet>
+ </IdUrlList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdUrlList>
+ <IdUrlSet>
+ <Id>20807972</Id>
+ <ObjUrl>
+ <Url>http://salilab.org/modbase/search?databaseID=20807972</Url>
+ <IconUrl>http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg</IconUrl>
+ <SubjectType>structure</SubjectType>
+ <Attribute>registration required</Attribute>
+ <Provider>
+ <Name>MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)</Name>
+ <NameAbbr>modbase</NameAbbr>
+ <Id>5447</Id>
+ <Url>http://salilab.org/modbase</Url>
+ <IconUrl>http://salilab.org/modbase/img/modbase.jpg</IconUrl>
+ </Provider>
+ </ObjUrl>
+ </IdUrlSet>
+ </IdUrlList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdUrlList>
+ <IdUrlSet>
+ <Id>730439</Id>
+ <ObjUrl>
+ <Url>http://salilab.org/modbase/search?databaseID=730439</Url>
+ <IconUrl>http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg</IconUrl>
+ <SubjectType>structure</SubjectType>
+ <Attribute>registration required</Attribute>
+ <Provider>
+ <Name>MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)</Name>
+ <NameAbbr>modbase</NameAbbr>
+ <Id>5447</Id>
+ <Url>http://salilab.org/modbase</Url>
+ <IconUrl>http://salilab.org/modbase/img/modbase.jpg</IconUrl>
+ </Provider>
+ </ObjUrl>
+ </IdUrlSet>
+ </IdUrlList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_multidb.xml b/t/data/eutils/elink_multidb.xml
new file mode 100644
index 0000000..189acb9
--- /dev/null
+++ b/t/data/eutils/elink_multidb.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>730439</Id>
+ <Id>68536103</Id>
+ <Id>1621261</Id>
+ <Id>20807972</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <Link>
+ <Id>306537</Id>
+ </Link>
+ <Link>
+ <Id>273068</Id>
+ </Link>
+ <Link>
+ <Id>83332</Id>
+ </Link>
+ <Link>
+ <Id>1394</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_multidb_corr.xml b/t/data/eutils/elink_multidb_corr.xml
new file mode 100644
index 0000000..c404a45
--- /dev/null
+++ b/t/data/eutils/elink_multidb_corr.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>1621261</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <Link>
+ <Id>83332</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>68536103</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <Link>
+ <Id>306537</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>20807972</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <Link>
+ <Id>273068</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>730439</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>taxonomy</DbTo>
+ <LinkName>protein_taxonomy</LinkName>
+ <Link>
+ <Id>1394</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_ncheck.xml b/t/data/eutils/elink_ncheck.xml
new file mode 100644
index 0000000..e1d7122
--- /dev/null
+++ b/t/data/eutils/elink_ncheck.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasNeighbor="Y">730439</Id>
+ <Id HasNeighbor="Y">68536103</Id>
+ <Id HasNeighbor="Y">1621261</Id>
+ <Id HasNeighbor="Y">20807972</Id>
+ </IdCheckList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_ncheck_corr.xml b/t/data/eutils/elink_ncheck_corr.xml
new file mode 100644
index 0000000..a547961
--- /dev/null
+++ b/t/data/eutils/elink_ncheck_corr.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasNeighbor="Y">1621261</Id>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasNeighbor="Y">68536103</Id>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasNeighbor="Y">20807972</Id>
+ </IdCheckList>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdCheckList>
+ <Id HasNeighbor="Y">730439</Id>
+ </IdCheckList>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_neighbor.xml b/t/data/eutils/elink_neighbor.xml
new file mode 100644
index 0000000..545e585
--- /dev/null
+++ b/t/data/eutils/elink_neighbor.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>730439</Id>
+ <Id>68536103</Id>
+ <Id>1621261</Id>
+ <Id>20807972</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <Link>
+ <Id>15968079</Id>
+ </Link>
+ <Link>
+ <Id>12368430</Id>
+ </Link>
+ <Link>
+ <Id>11997336</Id>
+ </Link>
+ <Link>
+ <Id>9634230</Id>
+ </Link>
+ <Link>
+ <Id>8206848</Id>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <Link>
+ <Id>15968079</Id>
+ </Link>
+ <Link>
+ <Id>11997336</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_neighbor_corr.xml b/t/data/eutils/elink_neighbor_corr.xml
new file mode 100644
index 0000000..34167d1
--- /dev/null
+++ b/t/data/eutils/elink_neighbor_corr.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>1621261</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <Link>
+ <Id>12368430</Id>
+ </Link>
+ <Link>
+ <Id>9634230</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>68536103</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <Link>
+ <Id>15968079</Id>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <Link>
+ <Id>15968079</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>20807972</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <Link>
+ <Id>11997336</Id>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <Link>
+ <Id>11997336</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>730439</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <Link>
+ <Id>8206848</Id>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_nhist.xml b/t/data/eutils/elink_nhist.xml
new file mode 100644
index 0000000..93aed0a
--- /dev/null
+++ b/t/data/eutils/elink_nhist.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>730439</Id>
+ <Id>68536103</Id>
+ <Id>1621261</Id>
+ <Id>20807972</Id>
+ </IdList>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <QueryKey>1</QueryKey>
+ </LinkSetDbHistory>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <QueryKey>2</QueryKey>
+ </LinkSetDbHistory>
+ <WebEnv>085LBC0s_G5ZenmRAnAm9dgF-TYrzyM9zVawz6_GfunjA5iasUqoGSfSzd at 991070AE944054A1_0001SID</WebEnv>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_nhist_corr.xml b/t/data/eutils/elink_nhist_corr.xml
new file mode 100644
index 0000000..dd39c23
--- /dev/null
+++ b/t/data/eutils/elink_nhist_corr.xml
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>1621261</Id>
+ </IdList>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <QueryKey>1</QueryKey>
+ </LinkSetDbHistory>
+ <WebEnv>0-g5Po62X-zBqwiLv9LDfH6dJvaMByxF-B7jUpwxS73UvKdcD2qdti4CNbY at 03F16D1B94400731_0005SID</WebEnv>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>68536103</Id>
+ </IdList>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <QueryKey>2</QueryKey>
+ </LinkSetDbHistory>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <QueryKey>2</QueryKey>
+ </LinkSetDbHistory>
+ <WebEnv>0-g5Po62X-zBqwiLv9LDfH6dJvaMByxF-B7jUpwxS73UvKdcD2qdti4CNbY at 03F16D1B94400731_0005SID</WebEnv>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>20807972</Id>
+ </IdList>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <QueryKey>3</QueryKey>
+ </LinkSetDbHistory>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed_refseq</LinkName>
+ <QueryKey>3</QueryKey>
+ </LinkSetDbHistory>
+ <WebEnv>0-g5Po62X-zBqwiLv9LDfH6dJvaMByxF-B7jUpwxS73UvKdcD2qdti4CNbY at 03F16D1B94400731_0005SID</WebEnv>
+</LinkSet>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>730439</Id>
+ </IdList>
+ <LinkSetDbHistory>
+ <DbTo>pubmed</DbTo>
+ <LinkName>protein_pubmed</LinkName>
+ <QueryKey>4</QueryKey>
+ </LinkSetDbHistory>
+ <WebEnv>0-g5Po62X-zBqwiLv9LDfH6dJvaMByxF-B7jUpwxS73UvKdcD2qdti4CNbY at 03F16D1B94400731_0005SID</WebEnv>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/elink_scores.xml b/t/data/eutils/elink_scores.xml
new file mode 100644
index 0000000..868ee81
--- /dev/null
+++ b/t/data/eutils/elink_scores.xml
@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_020511.dtd">
+<eLinkResult>
+<LinkSet>
+ <DbFrom>protein</DbFrom>
+ <IdList>
+ <Id>15622530</Id>
+ </IdList>
+ <LinkSetDb>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein</LinkName>
+ <Link>
+ <Id>15622530</Id>
+ <Score>2147483647</Score>
+ </Link>
+ <Link>
+ <Id>15921743</Id>
+ <Score>381</Score>
+ </Link>
+ <Link>
+ <Id>70607303</Id>
+ <Score>178</Score>
+ </Link>
+ <Link>
+ <Id>68567951</Id>
+ <Score>178</Score>
+ </Link>
+ <Link>
+ <Id>145702933</Id>
+ <Score>161</Score>
+ </Link>
+ <Link>
+ <Id>146304683</Id>
+ <Score>161</Score>
+ </Link>
+ <Link>
+ <Id>6015889</Id>
+ <Score>142</Score>
+ </Link>
+ <Link>
+ <Id>13813749</Id>
+ <Score>142</Score>
+ </Link>
+ <Link>
+ <Id>15897502</Id>
+ <Score>142</Score>
+ </Link>
+ </LinkSetDb>
+ <LinkSetDb>
+ <DbTo>protein</DbTo>
+ <LinkName>protein_protein_identical</LinkName>
+ <Link>
+ <Id>15622530</Id>
+ <Score>2147483647</Score>
+ </Link>
+ <Link>
+ <Id>74573864</Id>
+ <Score>0</Score>
+ </Link>
+ <Link>
+ <Id>15921743</Id>
+ <Score>0</Score>
+ </Link>
+ </LinkSetDb>
+</LinkSet>
+</eLinkResult>
diff --git a/t/data/eutils/epost.xml b/t/data/eutils/epost.xml
new file mode 100644
index 0000000..b4ceee0
--- /dev/null
+++ b/t/data/eutils/epost.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<!DOCTYPE ePostResult PUBLIC "-//NLM//DTD ePostResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/ePost_020511.dtd">
+<ePostResult>
+ <QueryKey>1</QueryKey>
+ <WebEnv>0rACq8_iP87yHkqqm0SBaU38LzWLHIUd-J4QozMr31bh_XO5KAxLr5Q0o2e at 03ED1E11941B69F1_0100SID</WebEnv>
+</ePostResult>
diff --git a/t/data/eutils/esearch1.xml b/t/data/eutils/esearch1.xml
new file mode 100644
index 0000000..cc80680
--- /dev/null
+++ b/t/data/eutils/esearch1.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
+<eSearchResult>
+ <Count>534</Count>
+ <RetMax>20</RetMax>
+ <RetStart>0</RetStart>
+ <IdList>
+ <Id>6679096</Id>
+ <Id>31543332</Id>
+ <Id>134288853</Id>
+ <Id>483581</Id>
+ <Id>20805941</Id>
+ <Id>187951953</Id>
+ <Id>169158074</Id>
+ <Id>123228044</Id>
+ <Id>148676374</Id>
+ <Id>114326469</Id>
+ <Id>148707003</Id>
+ <Id>187952787</Id>
+ <Id>123233807</Id>
+ <Id>148694865</Id>
+ <Id>148694864</Id>
+ <Id>148694863</Id>
+ <Id>148694861</Id>
+ <Id>148694862</Id>
+ <Id>8705244</Id>
+ <Id>8568086</Id>
+ </IdList>
+ <TranslationSet>
+ <Translation>
+ <From>Mus musculus</From>
+ <To>("Mus musculus"[Organism:__txid10090] OR Mus musculus)</To>
+ </Translation>
+ </TranslationSet>
+ <TranslationStack>
+ <TermSet>
+ <Term>Notch[All Fields]</Term>
+ <Field>All Fields</Field>
+ <Count>4614</Count>
+ <Explode>Y</Explode>
+ </TermSet>
+ <TermSet>
+ <Term>"Mus musculus"[Organism]</Term>
+ <Field>Organism</Field>
+ <Count>250381</Count>
+ <Explode>Y</Explode>
+ </TermSet>
+ <TermSet>
+ <Term>Mus musculus[All Fields]</Term>
+ <Field>All Fields</Field>
+ <Count>256310</Count>
+ <Explode>Y</Explode>
+ </TermSet>
+ <OP>OR</OP>
+ <OP>GROUP</OP>
+ <OP>AND</OP>
+ </TranslationStack>
+ <QueryTranslation>Notch[All Fields] AND ("Mus musculus"[Organism] OR Mus musculus[All Fields])</QueryTranslation>
+</eSearchResult>
diff --git a/t/data/eutils/esearch2.xml b/t/data/eutils/esearch2.xml
new file mode 100644
index 0000000..1977ee3
--- /dev/null
+++ b/t/data/eutils/esearch2.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<!DOCTYPE eSearchResult PUBLIC "-//NLM//DTD eSearchResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSearch_020511.dtd">
+<eSearchResult>
+ <Count>534</Count>
+ <RetMax>20</RetMax>
+ <RetStart>0</RetStart>
+ <QueryKey>1</QueryKey>
+ <WebEnv>00m7eJh8lyG3wiC2SE2hd7Im_w5o3z3q4_JK9-Rn266ix_eRXkjNOYQxHp at 03F17619941CFD71_0005SID</WebEnv>
+ <IdList>
+ <Id>6679096</Id>
+ <Id>31543332</Id>
+ <Id>134288853</Id>
+ <Id>483581</Id>
+ <Id>20805941</Id>
+ <Id>187951953</Id>
+ <Id>169158074</Id>
+ <Id>123228044</Id>
+ <Id>148676374</Id>
+ <Id>114326469</Id>
+ <Id>148707003</Id>
+ <Id>187952787</Id>
+ <Id>123233807</Id>
+ <Id>148694865</Id>
+ <Id>148694864</Id>
+ <Id>148694863</Id>
+ <Id>148694861</Id>
+ <Id>148694862</Id>
+ <Id>8705244</Id>
+ <Id>8568086</Id>
+ </IdList>
+ <TranslationSet>
+ <Translation>
+ <From>Mus musculus</From>
+ <To>("Mus musculus"[Organism:__txid10090] OR Mus musculus)</To>
+ </Translation>
+ </TranslationSet>
+ <TranslationStack>
+ <TermSet>
+ <Term>Notch[All Fields]</Term>
+ <Field>All Fields</Field>
+ <Count>4614</Count>
+ <Explode>Y</Explode>
+ </TermSet>
+ <TermSet>
+ <Term>"Mus musculus"[Organism]</Term>
+ <Field>Organism</Field>
+ <Count>250381</Count>
+ <Explode>Y</Explode>
+ </TermSet>
+ <TermSet>
+ <Term>Mus musculus[All Fields]</Term>
+ <Field>All Fields</Field>
+ <Count>256310</Count>
+ <Explode>Y</Explode>
+ </TermSet>
+ <OP>OR</OP>
+ <OP>GROUP</OP>
+ <OP>AND</OP>
+ </TranslationStack>
+ <QueryTranslation>Notch[All Fields] AND ("Mus musculus"[Organism] OR Mus musculus[All Fields])</QueryTranslation>
+</eSearchResult>
diff --git a/t/data/eutils/espell.xml b/t/data/eutils/espell.xml
new file mode 100644
index 0000000..e6b611c
--- /dev/null
+++ b/t/data/eutils/espell.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<!DOCTYPE eSpellResult PUBLIC "-//NLM//DTD eSpellResult, 23 November 2004//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSpell.dtd">
+<eSpellResult>
+ <Database>pubmed</Database>
+ <Query>Netch AND Mus musclus</Query>
+ <CorrectedQuery>notch AND mus musculus</CorrectedQuery>
+ <SpelledQuery><Replaced>notch</Replaced><Original> AND mus </Original><Replaced>musculus</Replaced></SpelledQuery>
+ <ERROR/>
+</eSpellResult>
diff --git a/t/data/eutils/esummary1.xml b/t/data/eutils/esummary1.xml
new file mode 100644
index 0000000..034871f
--- /dev/null
+++ b/t/data/eutils/esummary1.xml
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD eSummaryResult, 11 May 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSummary_041029.dtd">
+<eSummaryResult>
+
+<DocSum>
+<Id>1621261</Id>
+<Item Name="Caption" Type="String">CAB02640</Item>
+<Item Name="Title" Type="String">PROBABLE PYRIMIDINE OPERON REGULATORY PROTEIN PYRR [Mycobacterium tuberculosis H37Rv]</Item>
+<Item Name="Extra" Type="String">gi|1621261|emb|CAB02640.1|[1621261]</Item>
+<Item Name="Gi" Type="Integer">1621261</Item>
+<Item Name="CreateDate" Type="String">2003/11/21</Item>
+<Item Name="UpdateDate" Type="String">2008/10/23</Item>
+<Item Name="Flags" Type="Integer">0</Item>
+<Item Name="TaxId" Type="Integer">83332</Item>
+<Item Name="Length" Type="Integer">193</Item>
+<Item Name="Status" Type="String">live</Item>
+<Item Name="ReplacedBy" Type="String"></Item>
+<Item Name="Comment" Type="String"><![CDATA[ ]]></Item>
+</DocSum>
+
+
+<DocSum>
+<Id>89318838</Id>
+<Item Name="Caption" Type="String">EAS10332</Item>
+<Item Name="Title" Type="String"></Item>
+<Item Name="Extra" Type="String">gi|89318838|gb|EAS10332.1|[89318838]</Item>
+<Item Name="Gi" Type="Integer">89318838</Item>
+<Item Name="CreateDate" Type="String"></Item>
+<Item Name="UpdateDate" Type="String"></Item>
+<Item Name="Flags" Type="Integer">0</Item>
+<Item Name="TaxId" Type="Integer">0</Item>
+<Item Name="Length" Type="Integer">0</Item>
+<Item Name="Status" Type="String">suppressed</Item>
+<Item Name="ReplacedBy" Type="String"></Item>
+<Item Name="Comment" Type="String"><![CDATA[ This WGS project has been superceded by the complete genome in CP000656-CP000659 ]]></Item>
+</DocSum>
+
+
+<DocSum>
+<Id>68536103</Id>
+<Item Name="Caption" Type="String">YP_250808</Item>
+<Item Name="Title" Type="String">pyrimidine regulatory protein PyrR [Corynebacterium jeikeium K411]</Item>
+<Item Name="Extra" Type="String">gi|68536103|ref|YP_250808.1|[68536103]</Item>
+<Item Name="Gi" Type="Integer">68536103</Item>
+<Item Name="CreateDate" Type="String">2005/07/04</Item>
+<Item Name="UpdateDate" Type="String">2008/07/24</Item>
+<Item Name="Flags" Type="Integer">512</Item>
+<Item Name="TaxId" Type="Integer">306537</Item>
+<Item Name="Length" Type="Integer">195</Item>
+<Item Name="Status" Type="String">live</Item>
+<Item Name="ReplacedBy" Type="String"></Item>
+<Item Name="Comment" Type="String"><![CDATA[ ]]></Item>
+</DocSum>
+
+
+<DocSum>
+<Id>20807972</Id>
+<Item Name="Caption" Type="String">NP_623143</Item>
+<Item Name="Title" Type="String">pyrimidine regulatory protein PyrR [Thermoanaerobacter tengcongensis MB4]</Item>
+<Item Name="Extra" Type="String">gi|20807972|ref|NP_623143.1|[20807972]</Item>
+<Item Name="Gi" Type="Integer">20807972</Item>
+<Item Name="CreateDate" Type="String">2002/05/09</Item>
+<Item Name="UpdateDate" Type="String">2008/07/23</Item>
+<Item Name="Flags" Type="Integer">512</Item>
+<Item Name="TaxId" Type="Integer">273068</Item>
+<Item Name="Length" Type="Integer">178</Item>
+<Item Name="Status" Type="String">live</Item>
+<Item Name="ReplacedBy" Type="String"></Item>
+<Item Name="Comment" Type="String"><![CDATA[ ]]></Item>
+</DocSum>
+
+
+<DocSum>
+<Id>730439</Id>
+<Item Name="Caption" Type="String">P41007</Item>
+<Item Name="Title" Type="String">RecName: Full=Bifunctional protein pyrR; Includes: RecName: Full=Pyrimidine operon regulatory protein; Includes: RecName: Full=Uracil phosphoribosyltransferase; Short=UPRTase</Item>
+<Item Name="Extra" Type="String">gi|730439|sp|P41007.1|PYRR_BACCL[730439]</Item>
+<Item Name="Gi" Type="Integer">730439</Item>
+<Item Name="CreateDate" Type="String">1995/02/01</Item>
+<Item Name="UpdateDate" Type="String">2008/11/25</Item>
+<Item Name="Flags" Type="Integer">0</Item>
+<Item Name="TaxId" Type="Integer">1394</Item>
+<Item Name="Length" Type="Integer">179</Item>
+<Item Name="Status" Type="String">live</Item>
+<Item Name="ReplacedBy" Type="String"></Item>
+<Item Name="Comment" Type="String"><![CDATA[ ]]></Item>
+</DocSum>
+
+</eSummaryResult>
diff --git a/t/data/eutils/esummary2.xml b/t/data/eutils/esummary2.xml
new file mode 100644
index 0000000..1cd1064
--- /dev/null
+++ b/t/data/eutils/esummary2.xml
@@ -0,0 +1,130 @@
+<?xml version="1.0"?>
+<!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD eSummaryResult, 29 October 2004//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eSummary_041029.dtd">
+<eSummaryResult>
+<DocSum>
+ <Id>32049</Id>
+ <Item Name="HomoloGeneDataList" Type="List">
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Homo sapiens</Item>
+ <Item Name="TaxId" Type="Integer">9606</Item>
+ <Item Name="Symbol" Type="String">NOTCH1</Item>
+ <Item Name="Title" Type="String">Notch homolog 1, translocation-associated (Drosophila)</Item>
+ <Item Name="GeneID" Type="Integer">4851</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Pan troglodytes</Item>
+ <Item Name="TaxId" Type="Integer">9598</Item>
+ <Item Name="Symbol" Type="String">NOTCH1</Item>
+ <Item Name="Title" Type="String">Notch homolog 1, translocation-associated (Drosophila)</Item>
+ <Item Name="GeneID" Type="Integer">464865</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Canis lupus familiaris</Item>
+ <Item Name="TaxId" Type="Integer">9615</Item>
+ <Item Name="Symbol" Type="String">NOTCH1</Item>
+ <Item Name="Title" Type="String">Notch homolog 1, translocation-associated (Drosophila)</Item>
+ <Item Name="GeneID" Type="Integer">480676</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Bos taurus</Item>
+ <Item Name="TaxId" Type="Integer">9913</Item>
+ <Item Name="Symbol" Type="String">NOTCH1</Item>
+ <Item Name="Title" Type="String">Notch homolog 1, translocation-associated (Drosophila)</Item>
+ <Item Name="GeneID" Type="Integer">767866</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Mus musculus</Item>
+ <Item Name="TaxId" Type="Integer">10090</Item>
+ <Item Name="Symbol" Type="String">Notch1</Item>
+ <Item Name="Title" Type="String">Notch gene homolog 1 (Drosophila)</Item>
+ <Item Name="GeneID" Type="Integer">18128</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Rattus norvegicus</Item>
+ <Item Name="TaxId" Type="Integer">10116</Item>
+ <Item Name="Symbol" Type="String">Notch1</Item>
+ <Item Name="Title" Type="String">Notch homolog 1, translocation-associated (Drosophila)</Item>
+ <Item Name="GeneID" Type="Integer">25496</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Gallus gallus</Item>
+ <Item Name="TaxId" Type="Integer">9031</Item>
+ <Item Name="Symbol" Type="String">NOTCH1</Item>
+ <Item Name="Title" Type="String">Notch homolog 1, translocation-associated (Drosophila)</Item>
+ <Item Name="GeneID" Type="Integer">395655</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Danio rerio</Item>
+ <Item Name="TaxId" Type="Integer">7955</Item>
+ <Item Name="Symbol" Type="String">notch1b</Item>
+ <Item Name="Title" Type="String">notch homolog 1b</Item>
+ <Item Name="GeneID" Type="Integer">794892</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Drosophila melanogaster</Item>
+ <Item Name="TaxId" Type="Integer">7227</Item>
+ <Item Name="Symbol" Type="String">N</Item>
+ <Item Name="Title" Type="String">Notch</Item>
+ <Item Name="GeneID" Type="Integer">31293</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Anopheles gambiae</Item>
+ <Item Name="TaxId" Type="Integer">7165</Item>
+ <Item Name="Symbol" Type="String">AgaP_AGAP001015</Item>
+ <Item Name="Title" Type="String">AGAP001015-PA</Item>
+ <Item Name="GeneID" Type="Integer">1270506</Item>
+ </Item>
+ </Item>
+ <Item Name="Caption" Type="String">Gene conserved in Coelomata</Item>
+</DocSum>
+
+<DocSum>
+ <Id>45614</Id>
+ <Item Name="HomoloGeneDataList" Type="List">
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Homo sapiens</Item>
+ <Item Name="TaxId" Type="Integer">9606</Item>
+ <Item Name="Symbol" Type="String">hCG_1745121</Item>
+ <Item Name="Title" Type="String">notch1-induced protein</Item>
+ <Item Name="GeneID" Type="Integer">729920</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Bos taurus</Item>
+ <Item Name="TaxId" Type="Integer">9913</Item>
+ <Item Name="Symbol" Type="String">LOC617841</Item>
+ <Item Name="Title" Type="String">similar to notch1-induced protein</Item>
+ <Item Name="GeneID" Type="Integer">617841</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Mus musculus</Item>
+ <Item Name="TaxId" Type="Integer">10090</Item>
+ <Item Name="Symbol" Type="String">4930579E17Rik</Item>
+ <Item Name="Title" Type="String">RIKEN cDNA 4930579E17 gene</Item>
+ <Item Name="GeneID" Type="Integer">75847</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Rattus norvegicus</Item>
+ <Item Name="TaxId" Type="Integer">10116</Item>
+ <Item Name="Symbol" Type="String">LOC493574</Item>
+ <Item Name="Title" Type="String">notch1-induced protein</Item>
+ <Item Name="GeneID" Type="Integer">493574</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Gallus gallus</Item>
+ <Item Name="TaxId" Type="Integer">9031</Item>
+ <Item Name="Symbol" Type="String">LOC420591</Item>
+ <Item Name="Title" Type="String">hypothetical LOC420591</Item>
+ <Item Name="GeneID" Type="Integer">420591</Item>
+ </Item>
+ <Item Name="HomoloGeneData" Type="Structure">
+ <Item Name="TaxName" Type="String">Danio rerio</Item>
+ <Item Name="TaxId" Type="Integer">7955</Item>
+ <Item Name="Symbol" Type="String">zgc:154151</Item>
+ <Item Name="Title" Type="String">zgc:154151</Item>
+ <Item Name="GeneID" Type="Integer">798716</Item>
+ </Item>
+ </Item>
+ <Item Name="Caption" Type="String">Gene conserved in Euteleostomi</Item>
+</DocSum>
+
+</eSummaryResult>
diff --git a/t/egquery.t b/t/egquery.t
new file mode 100644
index 0000000..8bc5fa7
--- /dev/null
+++ b/t/egquery.t
@@ -0,0 +1,40 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: egquery.t 15112 2008-12-08 18:12:38Z sendu $
+#
+
+use strict;
+use warnings;
+
+use Test::More tests => 18;
+use inc::TestHelper qw(test_input_file);
+
+use Bio::Tools::EUtilities;
+
+# Normal esearch
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'egquery',
+ -file => test_input_file('eutils','egquery.xml'));
+
+is($eutil->get_db, 'pubmed', 'get_db');
+is($eutil->get_database, 'pubmed', 'get_database');
+is(scalar($eutil->get_databases), 35, 'get_databases');
+is($eutil->get_term, 'Notch AND Mus musculus','get_term');
+
+## eveything else undef or 0
+is ($eutil->get_count('pubmed'), 1803, 'get_count');
+is ($eutil->get_count('protein'), 534, 'get_count');
+is ($eutil->get_count('cdd'), 0, 'get_count');
+
+my @qs = $eutil->get_GlobalQueries;
+is(scalar(@qs), 35, 'get_GlobalQueries');
+is($qs[2]->get_term, 'Notch AND Mus musculus', 'get_term');
+is($qs[2]->get_database, 'journals', 'get_term');
+is($qs[2]->get_count, 0, 'get_term');
+is($qs[2]->get_status, 'Term or Database is not found', 'get_term');
+is($qs[2]->get_menu_name, 'Journals', 'get_term');
+
+is($qs[20]->get_term, 'Notch AND Mus musculus', 'get_term');
+is($qs[20]->get_database, 'unists', 'get_term');
+is($qs[20]->get_count, 61, 'get_term');
+is($qs[20]->get_status, 'Ok', 'get_term');
+is($qs[20]->get_menu_name, 'UniSTS', 'get_term');
diff --git a/t/einfo.t b/t/einfo.t
new file mode 100644
index 0000000..a86c385
--- /dev/null
+++ b/t/einfo.t
@@ -0,0 +1,85 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: einfo.t 15112 2008-12-08 18:12:38Z sendu $
+#
+
+use strict;
+use warnings;
+
+use Test::More tests => 49;
+use inc::TestHelper qw(test_input_file);
+
+use Bio::Tools::EUtilities;
+
+## einfo (no dbs)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'einfo',
+ -file => test_input_file('eutils','einfo_dbs.xml'));
+
+is(scalar($eutil->get_available_databases), 37, 'get_available_databases');
+is(scalar($eutil->get_databases), 37, 'get_databases');
+is($eutil->get_db, 'pubmed', 'get_db');
+
+# no data present for these
+is($eutil->get_record_count, undef, 'get_record_count');
+is($eutil->get_menu_name, undef, 'get_menu_name');
+is($eutil->get_last_update, undef, 'get_last_update');
+is($eutil->get_description, undef, 'get_description');
+
+my @fields = $eutil->get_FieldInfo;
+is(scalar(@fields), 0, 'FieldInfo');
+my @linkinfo = $eutil->get_LinkInfo;
+is(scalar(@linkinfo), 0, 'LinkInfo');
+
+# einfo (db-specific)
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'einfo',
+ -file => test_input_file('eutils','einfo.xml'));
+
+is($eutil->get_db, 'pubmed', 'get_db');
+is(scalar($eutil->get_dbs), 1, 'get_dbs');
+is($eutil->get_record_count, 18525976, 'get_record_count');
+is($eutil->get_menu_name, 'PubMed', 'get_menu_name');
+is($eutil->get_last_update, '2008/12/11 02:33', 'get_last_update');
+is($eutil->get_description, 'PubMed bibliographic record', 'get_description');
+
+ at fields = $eutil->get_FieldInfo;
+is(scalar(@fields), 41, 'FieldInfo');
+# test two
+is($fields[1]->get_term_count, 0, 'get_term_count');
+is($fields[1]->get_field_name, 'UID', 'get_field_name');
+is($fields[1]->get_field_code, 'UID', 'get_field_code');
+is($fields[1]->get_field_description, 'Unique number assigned to publication', 'get_field_description');
+is($fields[1]->is_date, 0, 'is_date');
+is($fields[1]->is_singletoken, 1, 'is_singletoken');
+is($fields[1]->is_hierarchy, 0, 'is_hierarchy');
+is($fields[1]->is_hidden, 1, 'is_hidden');
+is($fields[1]->is_numerical, 1, 'is_numerical');
+
+is($fields[19]->get_term_count, 83, 'get_term_count');
+is($fields[19]->get_field_name, 'MeSH Subheading', 'get_field_name');
+is($fields[19]->get_field_code, 'SUBH', 'get_field_code');
+is($fields[19]->get_field_description, 'Additional specificity for MeSH term', 'get_field_description');
+is($fields[19]->is_date, 0, 'is_date');
+is($fields[19]->is_singletoken, 1, 'is_singletoken');
+is($fields[19]->is_hierarchy, 0, 'is_hierarchy');
+is($fields[19]->is_hidden, 0, 'is_hidden');
+is($fields[19]->is_numerical, 0, 'is_numerical');
+
+ at linkinfo = $eutil->get_LinkInfo;
+is(scalar(@linkinfo), 46, 'LinkInfo');
+# test two
+is($linkinfo[1]->get_dbto, 'cancerchromosomes', 'get_dbto');
+is($linkinfo[1]->get_dbfrom, 'pubmed', 'get_dbfrom');
+is($linkinfo[1]->get_link_name, 'pubmed_cancerchromosomes', 'get_link_name');
+is($linkinfo[1]->get_link_description, 'Related Cancer Chromosomes', 'get_link_description');
+is($linkinfo[1]->get_priority, undef, 'get_priority');
+is($linkinfo[1]->get_html_tag, undef, 'get_html_tag');
+is($linkinfo[1]->get_url, undef, 'get_url');
+
+is($linkinfo[12]->get_dbto, 'geo', 'get_dbto');
+is($linkinfo[12]->get_dbfrom, 'pubmed', 'get_dbfrom');
+is($linkinfo[12]->get_link_name, 'pubmed_geo', 'get_link_name');
+is($linkinfo[12]->get_link_description, 'GEO records associated with pubmed record', 'get_link_description');
+is($linkinfo[12]->get_priority, undef, 'get_priority');
+is($linkinfo[12]->get_html_tag, undef, 'get_html_tag');
+is($linkinfo[12]->get_url, undef, 'get_url');
diff --git a/t/elink_acheck.t b/t/elink_acheck.t
new file mode 100644
index 0000000..2f4e8a0
--- /dev/null
+++ b/t/elink_acheck.t
@@ -0,0 +1,215 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+
+use Test::More tests => 128;
+
+use Bio::Tools::EUtilities;
+use inc::TestHelper qw(test_input_file);
+
+# check -correspondence => 0 (default) - this is set up to return the
+# exact same thing as correspondece = 1, tested below)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_acheck.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), 'LinkOut,cdd,gene,genome,genomeprj,'.
+ 'nuccore,pmc,protein,proteinclusters,pubmed,structure,taxonomy');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids('cdd')), '730439,68536103,1621261,20807972', 'get_ids');
+is(join(',',$eutil->get_ids('LinkOut')), '730439,1621261,20807972', 'get_ids');
+my @ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'uncorrelated LinkSets lump everything together');
+is(join(',',$ls[1]->get_databases), 'cdd,gene,genome,genomeprj,nuccore,pmc,'.
+ 'protein,proteinclusters,pubmed,structure,taxonomy');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '730439');
+is(join(',',$ls[0]->get_databases), 'LinkOut,cdd,pmc,protein,pubmed,structure,'.
+ 'taxonomy');
+is(join(',',$ls[0]->get_submitted_ids), '730439');
+is($ls[0]->get_dbfrom, 'protein');
+is(join(',',$ls[0]->get_link_names), 'protein_cdd,protein_cdd_concise_2,'.
+ 'protein_cdd_summary,protein_pmc,protein_protein,'.
+ 'protein_protein_cdart_summary,protein_protein_identical,protein_pubmed,'.
+ 'protein_structure_related,protein_taxonomy,ExternalLink');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 1);
+is($ls[0]->has_neighbor, 0);
+my @info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 11);
+is($info[1]->get_database, 'cdd');
+is($info[1]->get_dbfrom, 'protein');
+is($info[1]->get_link_name, 'protein_cdd_concise_2');
+is($info[1]->get_link_description, undef);
+is($info[1]->get_link_menu_name, 'Concise Conserved Domain Links');
+is($info[1]->get_priority, 128);
+is($info[1]->get_html_tag, undef);
+is($info[1]->get_url, undef);
+
+is($info[10]->get_database, 'LinkOut');
+is($info[10]->get_dbfrom, 'protein');
+is($info[10]->get_link_name, 'ExternalLink');
+is($info[10]->get_link_description, undef);
+is($info[10]->get_link_menu_name, 'LinkOut');
+is($info[10]->get_priority, 255);
+is($info[10]->get_html_tag, 'LinkOut');
+is($info[10]->get_url, undef);
+
+# no UrlLinks
+my @urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), 'cdd,gene,genome,genomeprj,nuccore,pmc,'.
+ 'protein,proteinclusters,pubmed,structure,taxonomy');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is(join(',',$ls[1]->get_link_names), 'protein_cdd,protein_cdd_concise_2,'.
+ 'protein_cdd_summary,protein_gene,protein_genome,protein_genomeprj,'.
+ 'protein_nuccore,protein_pmc,protein_protein,protein_protein_cdart_summary,'.
+ 'protein_protein_identical,protein_proteinclusters,protein_pubmed,'.
+ 'protein_pubmed_refseq,protein_structure_related,protein_taxonomy');
+is($ls[1]->get_dbfrom, 'protein');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 16);
+is($info[1]->get_database, 'cdd');
+is($info[1]->get_dbfrom, 'protein');
+is($info[1]->get_link_name, 'protein_cdd_concise_2');
+is($info[1]->get_link_description, undef);
+is($info[1]->get_link_menu_name, 'Concise Conserved Domain Links');
+is($info[1]->get_priority, 128);
+is($info[1]->get_html_tag, undef);
+is($info[1]->get_url, undef);
+is($info[14]->get_database, 'structure');
+is($info[14]->get_dbfrom, 'protein');
+is($info[14]->get_link_name, 'protein_structure_related');
+is($info[14]->get_link_description, undef);
+is($info[14]->get_link_menu_name, undef);
+is($info[14]->get_priority, 128);
+is($info[14]->get_html_tag, 'Related Structure');
+# Note the UID tag at end
+is($info[14]->get_url, 'http://structure.ncbi.nlm.nih.gov/Structure/cblast/'.
+ 'cblast.cgi?client=entrez&query_gi=<@UID@>');
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
+
+# check -correspondence => 1
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_acheck.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), 'LinkOut,cdd,gene,genome,genomeprj,'.
+ 'nuccore,pmc,protein,proteinclusters,pubmed,structure,taxonomy');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids('cdd')), '730439,68536103,1621261,20807972', 'get_ids');
+is(join(',',$eutil->get_ids('LinkOut')), '730439,1621261,20807972', 'get_ids');
+ at ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'correlated LinkSets separate ID data');
+is(join(',',$ls[1]->get_databases), 'cdd,gene,genome,genomeprj,nuccore,pmc,'.
+ 'protein,proteinclusters,pubmed,structure,taxonomy');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '730439');
+is(join(',',$ls[0]->get_databases), 'LinkOut,cdd,pmc,protein,pubmed,structure,'.
+ 'taxonomy');
+is(join(',',$ls[0]->get_submitted_ids), '730439');
+is(join(',',$ls[0]->get_link_names), 'protein_cdd,protein_cdd_concise_2,'.
+ 'protein_cdd_summary,protein_pmc,protein_protein,'.
+ 'protein_protein_cdart_summary,protein_protein_identical,'.
+ 'protein_pubmed,protein_structure_related,protein_taxonomy,ExternalLink');
+is($ls[0]->get_dbfrom, 'protein');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 1);
+is($ls[0]->has_neighbor, 0);
+ at info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 11);
+is($info[1]->get_database, 'cdd');
+is($info[1]->get_dbfrom, 'protein');
+is($info[1]->get_link_name, 'protein_cdd_concise_2');
+is($info[1]->get_link_description, undef);
+is($info[1]->get_link_menu_name, 'Concise Conserved Domain Links');
+is($info[1]->get_priority, 128);
+is($info[1]->get_html_tag, undef);
+is($info[1]->get_url, undef);
+
+is($info[10]->get_database, 'LinkOut');
+is($info[10]->get_dbfrom, 'protein');
+is($info[10]->get_link_name, 'ExternalLink');
+is($info[10]->get_link_description, undef);
+is($info[10]->get_link_menu_name, 'LinkOut');
+is($info[10]->get_priority, 255);
+is($info[10]->get_html_tag, 'LinkOut');
+is($info[10]->get_url, undef);
+
+# no UrlLinks
+ at urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), 'cdd,gene,genome,genomeprj,nuccore,pmc,'.
+ 'protein,proteinclusters,pubmed,structure,taxonomy');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is($ls[1]->get_dbfrom, 'protein');
+is(join(',',$ls[1]->get_link_names), 'protein_cdd,protein_cdd_concise_2,'.
+ 'protein_cdd_summary,protein_gene,protein_genome,protein_genomeprj,'.
+ 'protein_nuccore,protein_pmc,protein_protein,protein_protein_cdart_summary,'.
+ 'protein_protein_identical,protein_proteinclusters,protein_pubmed,'.
+ 'protein_pubmed_refseq,protein_structure_related,protein_taxonomy');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 16);
+is($info[1]->get_database, 'cdd');
+is($info[1]->get_dbfrom, 'protein');
+is($info[1]->get_link_name, 'protein_cdd_concise_2');
+is($info[1]->get_link_description, undef);
+is($info[1]->get_link_menu_name, 'Concise Conserved Domain Links');
+is($info[1]->get_priority, 128);
+is($info[1]->get_html_tag, undef);
+is($info[1]->get_url, undef);
+is($info[14]->get_database, 'structure');
+is($info[14]->get_dbfrom, 'protein');
+is($info[14]->get_link_name, 'protein_structure_related');
+is($info[14]->get_link_description, undef);
+is($info[14]->get_link_menu_name, undef);
+is($info[14]->get_priority, 128);
+is($info[14]->get_html_tag, 'Related Structure');
+# Note the UID tag at end
+is($info[14]->get_url, 'http://structure.ncbi.nlm.nih.gov/Structure/cblast/'.
+ 'cblast.cgi?client=entrez&query_gi=<@UID@>');
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
diff --git a/t/elink_lcheck.t b/t/elink_lcheck.t
new file mode 100644
index 0000000..4589646
--- /dev/null
+++ b/t/elink_lcheck.t
@@ -0,0 +1,132 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+use Test::More tests => 62;
+
+use Bio::Tools::EUtilities;
+use inc::TestHelper qw(test_input_file);
+
+# check -correspondence => 0 (default) - this is set up to return the
+# exact same thing as correspondece = 1, tested below)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_lcheck.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+
+# for lcheck, db are not returned (check is for external link in, not out)
+is(join(',',$eutil->get_databases), '');
+
+# for elinks, IDs are globbed together when called from the parser
+# unless a database is specified. Since no database is specified, all
+# ids are lumped together regardless
+is(join(',',$eutil->get_ids), '730439,68536103,1621261,20807972', 'get_ids');
+my @ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'uncorrelated LinkSets lump everything together');
+is(join(',',$ls[1]->get_databases), '');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '730439');
+is(join(',',$ls[0]->get_databases), '');
+is(join(',',$ls[0]->get_submitted_ids), '730439');
+is($ls[0]->get_dbfrom, 'protein');
+is(join(',',$ls[0]->get_link_names), '');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 1);
+is($ls[0]->has_neighbor, 0);
+
+# no LinkInfo
+my @info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+my @urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), '');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is(join(',',$ls[1]->get_link_names), '');
+is($ls[1]->get_dbfrom, 'protein');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
+
+# check -correspondence => 1
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_lcheck_corr.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), '');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '1621261,68536103,20807972,730439', 'get_ids');
+ at ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'correlated LinkSets separate ID data');
+is(join(',',$ls[1]->get_databases), '');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '1621261');
+is(join(',',$ls[0]->get_databases), '');
+is(join(',',$ls[0]->get_submitted_ids), '1621261');
+is(join(',',$ls[0]->get_link_names), '');
+is($ls[0]->get_dbfrom, 'protein');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 1);
+is($ls[0]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), '');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is($ls[1]->get_dbfrom, 'protein');
+is(join(',',$ls[1]->get_link_names), '');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
diff --git a/t/elink_llinks.t b/t/elink_llinks.t
new file mode 100644
index 0000000..4a9b701
--- /dev/null
+++ b/t/elink_llinks.t
@@ -0,0 +1,154 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: elink_llinks.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+use Test::More tests => 84;
+
+use Bio::Tools::EUtilities;
+use inc::TestHelper qw(test_input_file);
+
+# check -correspondence => 0 (default) - this is set up to return the
+# exact same thing as correspondece = 1, tested below)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_llinks.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), '');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '730439,68536103,1621261,20807972', 'get_ids');
+my @ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'uncorrelated LinkSets lump everything together');
+is(join(',',$ls[1]->get_databases), '');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '730439');
+is(join(',',$ls[0]->get_databases), '');
+is(join(',',$ls[0]->get_submitted_ids), '730439');
+is($ls[0]->get_dbfrom, 'protein');
+is(join(',',$ls[0]->get_link_names), '');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+# no LinkInfo
+my @info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# has UrlLink!
+my @urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 1);
+
+is($urls[0]->get_dbfrom, 'protein');
+is($urls[0]->get_attribute, 'registration required');
+is($urls[0]->get_icon_url, 'http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg');
+is($urls[0]->get_subject_type, 'structure');
+is($urls[0]->get_url, 'http://salilab.org/modbase/search?databaseID=730439');
+is($urls[0]->get_link_name, undef);
+is($urls[0]->get_provider_name, 'MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)');
+is($urls[0]->get_provider_abbr, 'modbase');
+is($urls[0]->get_provider_id, 5447);
+is($urls[0]->get_provider_url, 'http://salilab.org/modbase');
+is($urls[0]->get_provider_icon_url, 'http://salilab.org/modbase/img/modbase.jpg');
+
+# Another LinkSet...
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), '');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is(join(',',$ls[1]->get_link_names), '');
+is($ls[1]->get_dbfrom, 'protein');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
+
+# check -correspondence => 1
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_llinks_corr.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), '');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '1621261,68536103,20807972,730439', 'get_ids');
+ at ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'correlated LinkSets separate ID data');
+is(join(',',$ls[1]->get_databases), '');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '1621261');
+is(join(',',$ls[0]->get_databases), '');
+is(join(',',$ls[0]->get_submitted_ids), '1621261');
+is(join(',',$ls[0]->get_link_names), '');
+is($ls[0]->get_dbfrom, 'protein');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+# no LinkInfo
+ at info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# has UrlLink!
+ at urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 1);
+
+is($urls[0]->get_dbfrom, 'protein');
+is($urls[0]->get_attribute, 'registration required');
+is($urls[0]->get_icon_url, 'http://www.ncbi.nlm.nih.gov/entrez/query/egifs/http:--salilab.org-modbase-img-modbase.jpg');
+is($urls[0]->get_subject_type, 'structure');
+is($urls[0]->get_url, 'http://salilab.org/modbase/search?databaseID=1621261');
+is($urls[0]->get_link_name, undef);
+is($urls[0]->get_provider_name, 'MODBASE, Database of Comparative Protein Structure Models (Sali Lab/UCSF)');
+is($urls[0]->get_provider_abbr, 'modbase');
+is($urls[0]->get_provider_id, 5447);
+is($urls[0]->get_provider_url, 'http://salilab.org/modbase');
+is($urls[0]->get_provider_icon_url, 'http://salilab.org/modbase/img/modbase.jpg');
+
+# another LinkSet
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), '');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is($ls[1]->get_dbfrom, 'protein');
+is(join(',',$ls[1]->get_link_names), '');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLink
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
diff --git a/t/elink_ncheck.t b/t/elink_ncheck.t
new file mode 100644
index 0000000..e6c4735
--- /dev/null
+++ b/t/elink_ncheck.t
@@ -0,0 +1,124 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+
+use Test::More tests => 58;
+
+use Bio::Tools::EUtilities;
+use inc::TestHelper qw(test_input_file);
+
+# check -correspondence => 0 (default) - this is set up to return the
+# exact same thing as correspondece = 1, tested below)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_ncheck.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+
+# for lcheck, db are not returned (check is for external link in, not out)
+is(join(',',$eutil->get_databases), '');
+
+# for elinks, IDs are globbed together when called from the parser
+# unless a database is specified. Since no database is specified, all
+# ids are lumped together regardless
+is(join(',',$eutil->get_ids), '730439,68536103,1621261,20807972', 'get_ids');
+my @ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'uncorrelated LinkSets lump everything together');
+is(join(',',$ls[1]->get_databases), '');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '730439');
+is(join(',',$ls[0]->get_databases), '');
+is(join(',',$ls[0]->get_submitted_ids), '730439');
+is($ls[0]->get_dbfrom, 'protein');
+is(join(',',$ls[0]->get_link_names), '');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 1);
+
+# no LinkInfo
+my @info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+my @urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), '');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is(join(',',$ls[1]->get_link_names), '');
+is($ls[1]->get_dbfrom, 'protein');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 1);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# check -correspondence => 1
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_ncheck_corr.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), '');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '1621261,68536103,20807972,730439', 'get_ids');
+ at ls = $eutil->get_LinkSets;
+is(scalar(@ls), 4, 'correlated LinkSets separate ID data');
+is(join(',',$ls[1]->get_databases), '');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '1621261');
+is(join(',',$ls[0]->get_databases), '');
+is(join(',',$ls[0]->get_submitted_ids), '1621261');
+is(join(',',$ls[0]->get_link_names), '');
+is($ls[0]->get_dbfrom, 'protein');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 1);
+
+# no LinkInfo
+ at info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+is(join(',',$ls[1]->get_ids), '68536103');
+is(join(',',$ls[1]->get_databases), '');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is($ls[1]->get_dbfrom, 'protein');
+is(join(',',$ls[1]->get_link_names), '');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 1);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
diff --git a/t/elink_neighbor.t b/t/elink_neighbor.t
new file mode 100644
index 0000000..9f17b6a
--- /dev/null
+++ b/t/elink_neighbor.t
@@ -0,0 +1,129 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+use Test::More tests => 61;
+
+use Bio::Tools::EUtilities;
+use inc::TestHelper qw(test_input_file);
+
+# check -correspondence => 0 (default)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_neighbor.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), 'pubmed');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '15968079,12368430,11997336,9634230,8206848,15968079,11997336', 'get_ids');
+my @ls = $eutil->get_LinkSets;
+is(scalar(@ls), 2, 'uncorrelated LinkSets lump everything together');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+# Note that retrieved IDs and submitted IDs are lumped together (don't correspond)
+is(join(',',$ls[0]->get_ids), '15968079,12368430,11997336,9634230,8206848');
+is(join(',',$ls[0]->get_databases), 'pubmed');
+is(join(',',$ls[0]->get_submitted_ids), '730439,68536103,1621261,20807972');
+is($ls[0]->get_dbfrom, 'protein');
+is(join(',',$ls[0]->get_link_names), 'protein_pubmed');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 0);
+
+# no LinkInfo
+my @info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+my @urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+# next
+is(join(',',$ls[1]->get_ids), '15968079,11997336');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+is(join(',',$ls[1]->get_submitted_ids), '730439,68536103,1621261,20807972');
+is(join(',',$ls[1]->get_link_names), 'protein_pubmed_refseq');
+is($ls[1]->get_dbfrom, 'protein');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
+
+# check -correspondence => 1
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_neighbor_corr.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), 'pubmed');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '12368430,9634230,15968079,15968079,11997336,11997336,8206848', 'get_ids');
+ at ls = $eutil->get_LinkSets;
+is(scalar(@ls), 6, 'correlated LinkSets separate ID data');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+# Note that you can get more that one returned ID, but only one submitted ID
+is(join(',',$ls[0]->get_ids), '12368430,9634230');
+is(join(',',$ls[0]->get_submitted_ids), '1621261');
+is(join(',',$ls[0]->get_link_names), 'protein_pubmed');
+is($ls[0]->get_dbfrom, 'protein');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, undef);
+is($ls[0]->get_query_key, undef);
+
+is(join(',',$ls[1]->get_ids), '15968079');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is($ls[1]->get_dbfrom, 'protein');
+is(join(',',$ls[1]->get_link_names), 'protein_pubmed');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
diff --git a/t/elink_neighbor_history.t b/t/elink_neighbor_history.t
new file mode 100644
index 0000000..4b58f57
--- /dev/null
+++ b/t/elink_neighbor_history.t
@@ -0,0 +1,134 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+use Test::More tests => 63;
+
+use Bio::Tools::EUtilities;
+use inc::TestHelper qw(test_input_file);
+
+# check -correspondence => 0 (default)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_nhist.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), 'pubmed');
+
+# for elinks, IDs are globbed together when called from the parser unless a
+# database is specified when cmd=neighbor_history is used, no IDs come back
+# (they are stored on the server for further work)
+
+is(join(',',$eutil->get_ids), '', 'get_ids');
+my @ls = $eutil->get_LinkSets;
+is(scalar(@ls), 2, 'uncorrelated LinkSets lump everything together');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::HistoryI');
+
+# check data in LinkSets
+# Note that retrieved IDs and submitted IDs are lumped together (don't correspond)
+is(join(',',$ls[0]->get_ids), '');
+is(join(',',$ls[0]->get_databases), 'pubmed');
+is(join(',',$ls[0]->get_submitted_ids), '730439,68536103,1621261,20807972');
+is($ls[0]->get_dbfrom, 'protein');
+is(join(',',$ls[0]->get_link_names), 'protein_pubmed');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 0);
+
+# no LinkInfo
+my @info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+my @urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, '085LBC0s_G5ZenmRAnAm9dgF-TYrzyM9zVawz6_GfunjA5iasUqoGSfSzd at 991070AE944054A1_0001SID');
+is($ls[0]->get_query_key, 1);
+
+# next
+is(join(',',$ls[1]->get_ids), '');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+is(join(',',$ls[1]->get_submitted_ids), '730439,68536103,1621261,20807972');
+is(join(',',$ls[1]->get_link_names), 'protein_pubmed_refseq');
+is($ls[1]->get_dbfrom, 'protein');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, '085LBC0s_G5ZenmRAnAm9dgF-TYrzyM9zVawz6_GfunjA5iasUqoGSfSzd at 991070AE944054A1_0001SID');
+is($ls[1]->get_query_key, 2);
+
+# check -correspondence => 1
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_nhist_corr.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), 'pubmed');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '', 'get_ids');
+ at ls = $eutil->get_LinkSets;
+is(scalar(@ls), 6, 'correlated LinkSets separate ID data');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::HistoryI');
+
+# check data in LinkSets
+# Note that you can get more that one returned ID, but only one submitted ID
+is(join(',',$ls[0]->get_ids), '');
+is(join(',',$ls[0]->get_submitted_ids), '1621261');
+is(join(',',$ls[0]->get_link_names), 'protein_pubmed');
+is($ls[0]->get_dbfrom, 'protein');
+is($ls[0]->has_scores, 0);
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[0]->get_webenv, '0-g5Po62X-zBqwiLv9LDfH6dJvaMByxF-B7jUpwxS73UvKdcD2qdti4CNbY at 03F16D1B94400731_0005SID');
+is($ls[0]->get_query_key, 1);
+
+is(join(',',$ls[1]->get_ids), '');
+is(join(',',$ls[1]->get_databases), 'pubmed');
+is(join(',',$ls[1]->get_submitted_ids), '68536103');
+is($ls[1]->get_dbfrom, 'protein');
+is(join(',',$ls[1]->get_link_names), 'protein_pubmed');
+is($ls[1]->has_scores, 0);
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+# HistoryI
+is($ls[1]->get_webenv, '0-g5Po62X-zBqwiLv9LDfH6dJvaMByxF-B7jUpwxS73UvKdcD2qdti4CNbY at 03F16D1B94400731_0005SID');
+is($ls[1]->get_query_key, 2);
diff --git a/t/elink_scores.t b/t/elink_scores.t
new file mode 100644
index 0000000..ad44568
--- /dev/null
+++ b/t/elink_scores.t
@@ -0,0 +1,106 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+use Test::More tests => 56;
+
+use Bio::Tools::EUtilities;
+use inc::TestHelper qw(test_input_file);
+
+# check -correspondence => 0 (default) - this is set up to return the
+# exact same thing as correspondece = 1, tested below)
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'elink',
+ -file => test_input_file('eutils','elink_scores.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Link');
+is(join(',',$eutil->get_databases), 'protein');
+
+# for elinks, IDs are globbed together when called from the parser unless a database is specified
+is(join(',',$eutil->get_ids), '15622530,15921743,70607303,68567951,145702933,'.
+ '146304683,6015889,13813749,15897502,15622530,74573864,15921743', 'get_ids');
+my @ls = $eutil->get_LinkSets;
+is(scalar(@ls), 2, 'uncorrelated LinkSets lump everything together');
+is(join(',',$ls[1]->get_databases), 'protein');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($ls[0], 'Bio::Tools::EUtilities::Link::LinkSet');
+
+# check data in LinkSets
+is(join(',',$ls[0]->get_ids), '15622530,15921743,70607303,68567951,145702933,'.
+ '146304683,6015889,13813749,15897502');
+is(join(',',$ls[0]->get_databases), 'protein');
+is(join(',',$ls[0]->get_submitted_ids), '15622530');
+is($ls[0]->get_dbfrom, 'protein');
+is(join(',',$ls[0]->get_link_names), 'protein_protein');
+is($ls[0]->has_linkout, 0);
+is($ls[0]->has_neighbor, 0);
+
+# has relatedness scores!
+is($ls[0]->has_scores, 1);
+
+my %sd = (
+ 15622530 => 2147483647,
+ 15921743 => 381,
+ 70607303 => 178,
+ 68567951 => 178,
+ 145702933 => 161,
+ 146304683 => 161,
+ 6015889 => 142,
+ 13813749 => 142,
+ 15897502 => 142);
+
+my %sc = $ls[0]->get_scores;
+for my $id ($ls[0]->get_ids) {
+ ok(exists($sc{$id}));
+ is($sc{$id}, $sd{$id});
+ delete $sd{$id};
+}
+is(keys %sd, 0);
+
+# no LinkInfo
+my @info = $ls[0]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+my @urls = $ls[0]->get_UrlLinks;
+is(scalar(@urls), 0);
+
+is(join(',',$ls[1]->get_ids), '15622530,74573864,15921743');
+is(join(',',$ls[1]->get_databases), 'protein');
+is(join(',',$ls[1]->get_submitted_ids), '15622530');
+is(join(',',$ls[1]->get_link_names), 'protein_protein_identical');
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_dbfrom, 'protein');
+is($ls[1]->has_linkout, 0);
+is($ls[1]->has_neighbor, 0);
+
+# has relatedness scores!
+is($ls[1]->has_scores, 1);
+
+%sd = (
+ 15622530 => 2147483647,
+ 74573864 => 0,
+ 15921743 => 0,
+);
+
+%sc = $ls[1]->get_scores;
+for my $id ($ls[1]->get_ids) {
+ ok(exists($sc{$id}));
+ is($sc{$id}, $sd{$id});
+ delete $sd{$id};
+}
+
+is(keys %sd, 0);
+
+# HistoryI
+is($ls[1]->get_webenv, undef);
+is($ls[1]->get_query_key, undef);
+
+# no LinkInfo
+ at info = $ls[1]->get_LinkInfo;
+is(scalar(@info), 0);
+
+# no UrlLinks
+ at urls = $ls[1]->get_UrlLinks;
+is(scalar(@urls), 0);
diff --git a/t/epost.t b/t/epost.t
new file mode 100644
index 0000000..b3493b1
--- /dev/null
+++ b/t/epost.t
@@ -0,0 +1,51 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+
+use Test::More tests => 15;
+
+use Bio::Tools::EUtilities;
+use Bio::Tools::EUtilities::EUtilParameters;
+
+use inc::TestHelper qw(test_input_file);
+
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'epost',
+ -file => test_input_file('eutils','epost.xml'));
+
+# all parsers and data objects implement eutil() and datatype() (generally for
+# debuggin purposes, but others may find them useful)
+isa_ok($eutil, 'Bio::Tools::EUtilities::EUtilDataI');
+isa_ok($eutil, 'Bio::Tools::EUtilities::Query');
+is($eutil->eutil, 'epost', 'eutil');
+is($eutil->datatype, 'query', 'datatype');
+my $history = $eutil->next_History;
+isa_ok($history, 'Bio::Tools::EUtilities::HistoryI');
+isa_ok($history, 'Bio::Tools::EUtilities::EUtilDataI');
+is($history->eutil, 'epost', 'eutil');
+is($history->datatype, 'history', 'eutil');
+
+# simple epost does not have anything other than the webenv/query_key
+is($history->get_webenv,
+ '0rACq8_iP87yHkqqm0SBaU38LzWLHIUd-J4QozMr31bh_XO5KAxLr5Q0o2e at 03ED1E11941B69F1_0100SID',
+ 'get_webenv');
+is($history->get_query_key, 1, 'get_query_key');
+is(join(',',$history->history),
+ '0rACq8_iP87yHkqqm0SBaU38LzWLHIUd-J4QozMr31bh_XO5KAxLr5Q0o2e at 03ED1E11941B69F1_0100SID,1', 'history');
+is($eutil->get_database, undef, 'get_database');
+is($eutil->get_ids, undef, 'get_ids');
+
+my @ids = qw(1621261 89318838 68536103 20807972 730439);
+
+# add Parameters
+my $pb = Bio::Tools::EUtilities::EUtilParameters->new(-eutil => 'epost',
+ -db => 'protein',
+ -id => \@ids);
+
+$eutil->parameter_base($pb);
+
+is($eutil->get_database, 'protein', 'get_database');
+my @ids2 = $eutil->get_ids;
+is_deeply(\@ids2, \@ids, 'get_ids');
diff --git a/t/esearch.t b/t/esearch.t
new file mode 100644
index 0000000..df8489d
--- /dev/null
+++ b/t/esearch.t
@@ -0,0 +1,91 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: esearch.t 15112 2008-12-08 18:12:38Z sendu $
+#
+
+use strict;
+use warnings;
+
+use Test::More tests => 31;
+
+use Bio::Tools::EUtilities;
+use Bio::Tools::EUtilities::EUtilParameters;
+use inc::TestHelper qw(test_input_file);
+
+my @ids = qw(6679096 31543332 134288853 483581 20805941 187951953 169158074
+123228044 148676374 114326469 148707003 187952787 123233807 148694865 148694864
+148694863 148694861 148694862 8705244 8568086);
+
+# test any Query-related methods (term related)
+
+# Normal esearch
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'esearch',
+ -file => test_input_file('eutils','esearch1.xml'));
+
+# w/o a ParameterBase, only IDs, count, retstart/retmax, optionally History
+is ($eutil->get_count, 534, 'get_count');
+my $history = $eutil->next_History;
+is($history, undef);
+my @ids2 = $eutil->get_ids;
+is_deeply(\@ids2, \@ids, 'get_ids');
+is($eutil->get_retstart, 0,'get_retstart');
+is($eutil->get_retmax, 20,'get_retmax');
+is($eutil->get_translation_from, 'Mus musculus','get_translation_from');
+is($eutil->get_translation_to, '("Mus musculus"[Organism:__txid10090] OR Mus musculus)','get_translation_to');
+
+# the database isn't carried into the parsers unless a EUtilParameters is present
+is($eutil->get_db, undef, 'get_db');
+is($eutil->get_database, undef, 'get_database');
+is($eutil->get_term, undef,'get_term');
+
+# add Parameters
+my $pb = Bio::Tools::EUtilities::EUtilParameters->new(-eutil => 'esearch',
+ -db => 'protein',
+ -term => 'Notch AND Mus musculus');
+
+$eutil->parameter_base($pb);
+
+# now will work...
+is($eutil->get_db, 'protein', 'get_db');
+is($eutil->get_database, 'protein', 'get_database');
+is($eutil->get_term, 'Notch AND Mus musculus','get_term');
+
+# espell only (should be undef)
+is($eutil->get_corrected_query, undef ,'get_corrected_query');
+is($eutil->get_replaced_terms, undef ,'get_replaced_terms');
+
+# test esearch data with History
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'esearch',
+ -file => test_input_file('eutils','esearch2.xml'));
+
+is ($eutil->get_count, 534, 'get_count');
+$history = $eutil->next_History;
+isa_ok($history, 'Bio::Tools::EUtilities::HistoryI');
+is($history->get_webenv,
+ '00m7eJh8lyG3wiC2SE2hd7Im_w5o3z3q4_JK9-Rn266ix_eRXkjNOYQxHp at 03F17619941CFD71_0005SID',
+ 'get_webenv');
+is($history->get_query_key, 1, 'get_query_key');
+is(join(',',$history->history),
+ '00m7eJh8lyG3wiC2SE2hd7Im_w5o3z3q4_JK9-Rn266ix_eRXkjNOYQxHp at 03F17619941CFD71_0005SID,1', 'history');
+
+ at ids2 = $eutil->get_ids;
+is_deeply(\@ids2, \@ids, 'get_ids');
+is($eutil->get_retstart, 0,'get_retstart');
+is($eutil->get_retmax, 20,'get_retmax');
+is($eutil->get_translation_from, 'Mus musculus','get_translation_from');
+is($eutil->get_translation_to, '("Mus musculus"[Organism:__txid10090] OR Mus musculus)','get_translation_to');
+
+# the database isn't carried into the parsers
+is($eutil->get_db, undef, 'get_db');
+is($eutil->get_database, undef, 'get_database');
+
+# the term isn't carried into the parsers
+is($eutil->get_term, undef,'get_term');
+
+# espell only (should be undef)
+is($eutil->get_corrected_query, undef ,'get_corrected_query');
+is($eutil->get_replaced_terms, undef ,'get_replaced_terms');
+
+my @qs = $eutil->get_GlobalQueries;
+is(scalar(@qs), 0, 'get_GlobalQueries')
diff --git a/t/espell.t b/t/espell.t
new file mode 100644
index 0000000..c699133
--- /dev/null
+++ b/t/espell.t
@@ -0,0 +1,48 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: espell.t 15112 2008-12-08 18:12:38Z sendu $
+#
+
+use strict;
+use warnings;
+
+use Test::More tests => 20;
+
+use Bio::Tools::EUtilities;
+use Bio::Tools::EUtilities::EUtilParameters;
+use inc::TestHelper qw(test_input_file);
+
+# Normal esearch
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'espell',
+ -file => test_input_file('eutils','espell.xml'));
+
+is($eutil->get_db, 'pubmed', 'get_db');
+is(($eutil->get_dbs)[0], 'pubmed', 'get_dbs');
+is($eutil->get_database, 'pubmed', 'get_database');
+is(($eutil->get_databases)[0], 'pubmed', 'get_databases');
+is($eutil->get_term, 'Netch AND Mus musclus','get_term');
+is($eutil->get_corrected_query, 'notch AND mus musculus' ,'get_corrected_query');
+is(scalar($eutil->get_replaced_terms), 2,'get_replaced_terms');
+is(join(',',$eutil->get_replaced_terms), 'notch,musculus','get_replaced_terms');
+
+# eveything else undef or 0
+is ($eutil->get_count, undef, 'get_count');
+my $history = $eutil->next_History;
+is($history, undef);
+my @ids2 = $eutil->get_ids;
+is(scalar(@ids2), 0, 'get_ids');
+is($eutil->get_retstart, undef,'get_retstart');
+is($eutil->get_retmax, undef,'get_retmax');
+is($eutil->get_translation_from, undef,'get_translation_from');
+is($eutil->get_translation_to, undef,'get_translation_to');
+
+# add Parameters
+my $pb = Bio::Tools::EUtilities::EUtilParameters->new(-eutil => 'espell',
+ -db => 'protein',
+ -term => 'Notch AND Mus musculus');
+
+is($eutil->get_db, 'pubmed', 'get_db');
+is(($eutil->get_dbs)[0], 'pubmed', 'get_dbs');
+is($eutil->get_database, 'pubmed', 'get_database');
+is(($eutil->get_databases)[0], 'pubmed', 'get_databases');
+is($eutil->get_term, 'Netch AND Mus musclus','get_term');
diff --git a/t/esummary.t b/t/esummary.t
new file mode 100644
index 0000000..c92da99
--- /dev/null
+++ b/t/esummary.t
@@ -0,0 +1,187 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: epost.t 15112 2008-12-08 18:12:38Z sendu $
+
+use strict;
+use warnings;
+use Test::More tests => 81;
+
+use Bio::Tools::EUtilities;
+use Bio::Tools::EUtilities::EUtilParameters;
+use inc::TestHelper qw(test_input_file);
+
+my $eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'esummary',
+ -file => test_input_file('eutils','esummary1.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Summary');
+
+# note that XML output does not contain the database; in order to retrieve this
+# (and similar missing params) one should pass in the EUtilParameters object
+
+is(join(',',$eutil->get_databases), '');
+
+# we'll add in some parameters (normally passed in via Bio::DB::EUtilities)
+my $p = Bio::Tools::EUtilities::EUtilParameters->new(
+ -eutil => 'esummary',
+ -db => 'protein',
+ -id => [1621261,89318838,68536103,20807972,730439]);
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Summary');
+
+$eutil->parameter_base($p);
+is(join(',',$eutil->get_databases), 'protein');
+
+# for esummary, DocSums contain the IDs, but we glob them together when called from the parser
+is(join(',',$eutil->get_ids), '1621261,89318838,68536103,20807972,730439', 'get_ids');
+
+my @ds = $eutil->get_DocSums;
+is(scalar(@ds), 5);
+
+isa_ok($ds[0], 'Bio::Tools::EUtilities::Summary::DocSum');
+
+# One ID per DocSum (get_ids is implemented JIC)
+is($ds[0]->get_id, '1621261');
+is(join(',',$ds[0]->get_ids), '1621261');
+
+# test two DocSums: get Items
+my @items = $ds[0]->get_all_Items;
+is(scalar(@items), 12);
+
+isa_ok($items[0], 'Bio::Tools::EUtilities::Summary::Item');
+# each Item has four possible pieces of data: ID, name, type, and content
+# also, an Item may have sub-Items (up to 3 hierarchal layers: Item, ListItem, StructureItem)
+is($items[0]->get_id,1621261);
+is($items[0]->get_name,'Caption');
+is($items[0]->get_type,'String');
+is($items[0]->get_content,'CAB02640');
+is(scalar($items[0]->get_ListItems), 0);
+
+is($items[3]->get_id,1621261);
+is($items[3]->get_name,'Gi');
+is($items[3]->get_type,'Integer');
+is($items[3]->get_content,1621261);
+is(scalar($items[3]->get_ListItems), 0);
+
+is($items[7]->get_id,1621261);
+is($items[7]->get_name,'TaxId');
+is($items[7]->get_type,'Integer');
+is($items[7]->get_content,83332);
+is(scalar($items[7]->get_ListItems), 0);
+
+ at items = $ds[2]->get_all_Items;
+is(scalar(@items), 12);
+
+isa_ok($items[0], 'Bio::Tools::EUtilities::Summary::Item');
+# each Item has four possible pieces of data: ID, name, type, and content
+# also, an Item may have sub-Items (up to 3 hierarchal layers: Item, ListItem, StructureItem)
+is($items[0]->get_id,68536103);
+is($items[0]->get_name,'Caption');
+is($items[0]->get_type,'String');
+is($items[0]->get_content,'YP_250808');
+is(scalar($items[0]->get_ListItems), 0);
+
+is($items[3]->get_id,68536103);
+is($items[3]->get_name,'Gi');
+is($items[3]->get_type,'Integer');
+is($items[3]->get_content,68536103);
+is(scalar($items[3]->get_ListItems), 0);
+
+is($items[7]->get_id,68536103);
+is($items[7]->get_name,'TaxId');
+is($items[7]->get_type,'Integer');
+is($items[7]->get_content,306537);
+is(scalar($items[7]->get_ListItems), 0);
+
+# getting data directly from DocSum
+
+is($ds[0]->get_type_by_name('Gi'), 'Integer');
+is(join(',',$ds[0]->get_contents_by_name('CreateDate')), '2003/11/21');
+
+is($ds[1]->get_type_by_name('Status'), 'String');
+is(join(',',$ds[1]->get_contents_by_name('Extra')), 'gi|89318838|gb|EAS10332.1|[89318838]');
+
+is($ds[3]->get_type_by_name('TaxId'), 'Integer');
+is(join(',',$ds[3]->get_contents_by_name('Title')), 'pyrimidine regulatory protein PyrR [Thermoanaerobacter tengcongensis MB4]');
+
+
+$eutil = Bio::Tools::EUtilities->new(
+ -eutil => 'esummary',
+ -file => test_input_file('eutils','esummary2.xml'));
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Summary');
+
+# note that XML output does not contain the database; in order to retrieve this
+# (and similar missing params) one should pass in the EUtilParameters object
+
+is(join(',',$eutil->get_databases), '');
+
+# we'll add in some parameters (normally passed in via Bio::DB::EUtilities)
+$p = Bio::Tools::EUtilities::EUtilParameters->new(
+ -eutil => 'esummary',
+ -db => 'homologene');
+
+isa_ok($eutil, 'Bio::Tools::EUtilities::Summary');
+
+$eutil->parameter_base($p);
+is(join(',',$eutil->get_databases), 'homologene');
+
+# for esummary, DocSums contain the IDs, but we glob them together when called from the parser
+is(join(',',$eutil->get_ids), '32049,45614', 'get_ids');
+
+ at ds = $eutil->get_DocSums;
+is(scalar(@ds), 2);
+
+isa_ok($ds[0], 'Bio::Tools::EUtilities::Summary::DocSum');
+
+# One ID per DocSum (get_ids is implemented JIC)
+is($ds[0]->get_id, '32049');
+is(join(',',$ds[0]->get_ids), '32049');
+
+# flattened list
+ at items = $ds[0]->get_all_Items;
+is(scalar(@items), 62);
+
+# Items are layered when caling get_Items; flattened list is not the same as
+# normal list
+ at items = $ds[0]->get_Items;
+is(scalar(@items), 2);
+
+isa_ok($items[0], 'Bio::Tools::EUtilities::Summary::Item');
+# each Item has four possible pieces of data: ID, name, type, and content
+# also, an Item may have sub-Items (up to 3 hierarchal layers: Item, ListItem, StructureItem)
+is($items[0]->get_id,32049);
+is($items[0]->get_name,'HomoloGeneDataList');
+is($items[0]->get_type,'List');
+is($items[0]->get_content, undef); # List contents are other Items
+
+# access List layer from top Item
+my @li = $items[0]->get_ListItems;
+is(scalar(@li), 10);
+ at li = $items[0]->get_Items;
+is(scalar(@li), 10);
+
+# access Structure Layer from List Item
+my @si = $li[1]->get_StructureItems;
+is(scalar(@si), 5);
+ at si = $li[1]->get_StructureItems;
+is(scalar(@si), 5);
+
+# test List Item
+is($li[1]->get_id,32049);
+is($li[1]->get_name,'HomoloGeneData');
+is($li[1]->get_type,'Structure');
+is($li[1]->get_content,undef); # Structure contents are other Items
+
+# test Structure Item
+is($si[2]->get_id,32049);
+is($si[2]->get_name,'Symbol');
+is($si[2]->get_type,'String');
+is($si[2]->get_content,'NOTCH1');
+
+# getting data directly from DocSum
+
+is($ds[0]->get_type_by_name('HomoloGeneData'), 'Structure');
+is(join(',',$ds[0]->get_contents_by_name('Symbol')), 'NOTCH1,NOTCH1,NOTCH1,NOTCH1,Notch1,Notch1,NOTCH1,notch1b,N,AgaP_AGAP001015');
+
+is($ds[1]->get_type_by_name('HomoloGeneDataList'), 'List');
+is(join(',',$ds[1]->get_contents_by_name('TaxId')), '9606,9913,10090,10116,9031,7955');
diff --git a/t/release-EUtilities.t b/t/release-EUtilities.t
new file mode 100644
index 0000000..fab35d1
--- /dev/null
+++ b/t/release-EUtilities.t
@@ -0,0 +1,458 @@
+
+BEGIN {
+ unless ($ENV{RELEASE_TESTING}) {
+ print "1..0 # SKIP these tests are for release candidate testing\n";
+ exit
+ }
+}
+
+use strict;
+use warnings;
+use Test::Most;
+use Bio::DB::EUtilities; # use_ok tested in 00-compile.t during release
+
+my $test_ct = 1; # base number of tests (those not in blocks)
+
+my %eutils = (
+ 'efetch' => {'tests' => 5,
+ 'sub' => \&efetch},
+ 'epost' => {'tests' => 11,
+ 'sub' => \&epost},
+ 'esummary' => {'tests' => 254,
+ 'sub' => \&esummary},
+ 'esearch' => {'tests' => 13,
+ 'sub' => \&esearch},
+ 'einfo' => {'tests' => 10,
+ 'sub' => \&einfo},
+ 'elink1' => {'tests' => 8,
+ 'sub' => \&elink1},
+ 'egquery' => {'tests' => 4,
+ 'sub' => \&egquery},
+);
+$test_ct += $eutils{$_}->{'tests'} for (keys %eutils);
+
+plan tests => $test_ct;
+
+my $debug = $ENV{BIOPERLDEBUG} || $ENV{BIOPERL_DEBUG} || 0;
+my $email = $ENV{BIOPERL_EMAIL};
+
+my ($eutil, $response);
+
+# protein acc
+my @acc = qw(MUSIGHBA1 P18584 CH402638);
+
+# protein GI
+my @ids = sort qw(1621261 89318838 68536103 20807972 730439);
+
+# test search term
+my $term = 'dihydroorotase AND human';
+
+my %dbs = (taxonomy => 1,
+ nucleotide => 1,
+ pubmed => 1);
+my %links = (protein_taxonomy => 1,
+ protein_nucleotide => 1,
+ protein_nucleotide_wgs => 1,
+ protein_pubmed => 1,
+ protein_pubmed_refseq => 1
+ );
+
+SKIP: {
+ ok($email, 'Make sure email is set (set BIOPERL_EMAIL)');
+ skip("Must provide a valid email for tests", $test_ct - 1) if !defined($email);
+ diag("Using $email for tests") if $debug;
+
+ # this loops through the required tests, only running what is in %EUTILS
+ for my $test (keys %eutils) {
+ $eutils{$test}->{'sub'}->();
+ }
+}
+# Simple EFetch
+
+sub efetch {
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -db => 'protein',
+ -id => [$ids[0]],
+ -rettype => 'fasta',
+ -email => $email
+ );
+
+ isa_ok($eutil, 'Bio::DB::GenericWebAgent');
+ eval {$response = $eutil->get_Response; };
+ skip("EFetch HTTP error: $@", 4) if $@;
+ isa_ok($response, 'HTTP::Response');
+ my $content = $response->content;
+ like($content, qr(PYRR \[Mycobacterium tuberculosis H37Rv\]),
+ 'EFetch: Fasta format');
+
+ # reuse the EUtilities webagent
+ $eutil->parameter_base->id([$ids[1]]);
+ $eutil->parameter_base->rettype('gb');
+ eval {$response = $eutil->get_Response; };
+ skip("EFetch HTTP error: $@", 2) if $@;
+ isa_ok($response, 'HTTP::Response');
+ $content = $response->content;
+ like($content, qr(^LOCUS\s+NP_623143),'EFetch: GenBank format');
+ }
+}
+
+# EPost->EFetch with History
+
+sub epost {
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'epost',
+ -db => 'protein',
+ -id => \@ids,
+ -email => $email
+ );
+
+ isa_ok($eutil, 'Bio::DB::GenericWebAgent');
+ eval {$response = $eutil->get_Response; };
+ skip("EPost HTTP error: $@", 10) if $@;
+ isa_ok($response, 'HTTP::Response');
+ # Any parameters are passed in to the parser, so these should be set.
+ # Databases and IDs always default back to the submitted ones unless
+ # the data being retrieved are IDs or contain new IDs (esearch, elink)
+
+ is($eutil->get_database, 'protein', '$epost->get_database()');
+ is(join(',',$eutil->get_ids), '1621261,20807972,68536103,730439,89318838', '$epost->get_ids()');
+
+ # these are the submitted IDs
+ is($eutil->get_count, 5, '$epost->get_count()');
+
+ # these are not set using epost
+ is($eutil->get_term, undef, '$epost->get_term()');
+
+ my $history = $eutil->next_History;
+ is($history->eutil, 'epost', 'History->eutil()');
+ isa_ok($history, 'Bio::Tools::EUtilities::HistoryI');
+
+ # check the actual History
+ my ($webenv, $key) = $history->history;
+ like($webenv, qr{^\S{25}}, '$epost WebEnv');
+ like($key, qr{^\d+}, '$epost query key');
+
+ # can we fetch the sequences?
+ $eutil->set_parameters(
+ -eutil => 'efetch',
+ -history => $history,
+ -rettype => 'fasta'
+ );
+ # look for fasta headers
+ my ($r, $t);
+ eval{ $r = $eutil->get_Response->content;};
+ skip("EPost HTTP error", 1) if $@;
+ $t = grep m{^>.*$}, split("\n", $r);
+ is($t, 5, 'EPost to EFetch');
+ }
+}
+
+# ESummary
+
+sub esummary {
+ my %docsum = (1621261=> { 'Caption' => ['String','CAB02640'],
+ 'Title' => ['String','PROBABLE PYRIMIDINE OPERON REGULATORY PROTEIN PYRR '.
+ '[Mycobacterium tuberculosis H37Rv]'],
+ 'Extra' => ['String','gi|1621261|emb|CAB02640.1|[1621261]'],
+ 'Gi' => ['Integer','1621261'],
+ 'CreateDate' => ['String','2003/11/21'],
+ 'UpdateDate' => ['String','2005/04/17'],
+ 'Flags' => ['Integer',''],
+ 'TaxId' => ['Integer','83332'],
+ 'Length' => ['Integer','193'],
+ 'Status' => ['String','live'],
+ 'ReplacedBy' => ['String',''],
+ 'Comment' => ['String',''], },
+ 20807972 => {'Caption' => ['String','NP_623143'],
+ 'Title' => ['String','pyrimidine regulatory protein PyrR '.
+ '[Thermoanaerobacter tengcongensis MB4]'],
+ 'Extra' => ['String','gi|20807972|ref|NP_623143.1|[20807972]'],
+ 'Gi' => ['Integer','20807972'],
+ 'CreateDate' => ['String','2002/05/09'],
+ 'UpdateDate' => ['String','2005/12/03'],
+ 'Flags' => ['Integer','512'],
+ 'TaxId' => ['Integer','273068'],
+ 'Length' => ['Integer','178'],
+ 'Status' => ['String','live'],
+ 'ReplacedBy' => ['String',''],
+ 'Comment' => ['String',''], },
+ 68536103 => {'Caption' => ['String','YP_250808'],
+ 'Title' => ['String','putative pyrimidine operon regulatory protein '.
+ '[Corynebacterium jeikeium K411]'],
+ 'Extra' => ['String','gi|68536103|ref|YP_250808.1|[68536103]'],
+ 'Gi' => ['Integer','68536103'],
+ 'CreateDate' => ['String','2005/07/04'],
+ 'UpdateDate' => ['String','2006/03/30'],
+ 'Flags' => ['Integer','512'],
+ 'TaxId' => ['Integer','306537'],
+ 'Length' => ['Integer','195'],
+ 'Status' => ['String','live'],
+ 'ReplacedBy' => ['String',''],
+ 'Comment' => ['String',''], },
+ 730439 => {'Caption' => ['String','P41007'],
+ 'Title' => ['String','PyrR bifunctional protein '.
+ '[Includes: Pyrimidine operon regulatory protein; '.
+ 'Uracil phosphoribosyltransferase (UPRTase)]'],
+ 'Extra' => ['String','gi|730439|sp|P41007|PYRR_BACCL[730439]'],
+ 'Gi' => ['Integer','730439'],
+ 'CreateDate' => ['String','1995/02/01'],
+ 'UpdateDate' => ['String','2006/07/25'],
+ 'Flags' => ['Integer',''],
+ 'TaxId' => ['Integer','1394'],
+ 'Length' => ['Integer','179'],
+ 'Status' => ['String','live'],
+ 'ReplacedBy' => ['String',''],
+ 'Comment' => ['String',''] },
+ 89318838 => { 'Caption' => ['String','EAS10332'],
+ 'Title' => ['String','Phosphoribosyltransferase '.
+ '[Mycobacterium gilvum PYR-GCK]'],
+ 'Extra' => ['String','gi|89318838|gb|EAS10332.1|[89318838]'],
+ 'Gi' => ['Integer','89318838'],
+ 'CreateDate' => ['String','2006/03/09'],
+ 'UpdateDate' => ['String','2006/03/09'],
+ 'Flags' => ['Integer',''],
+ 'TaxId' => ['Integer','350054'],
+ 'Length' => ['Integer','193'],
+ 'Status' => ['String','live'],
+ 'ReplacedBy' => ['String',''],
+ 'Comment' => ['String',''] } );
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'esummary',
+ -db => 'protein',
+ -id => \@ids,
+ -email => $email
+ );
+ isa_ok($eutil, 'Bio::DB::GenericWebAgent');
+
+ eval {$response = $eutil->get_Response; };
+ skip("ESummary HTTP error:$@", 253) if $@;
+ isa_ok($response, 'HTTP::Response');
+
+ my @docs = $eutil->get_DocSums();
+ is(scalar(@docs), 5, '$esum->get_DocSums()');
+
+ my $ct = 0;
+ while (my $ds = $eutil->next_DocSum) {
+ isa_ok($ds, 'Bio::Tools::EUtilities::Summary::DocSum');
+
+ my $id = $ds->get_id();
+ ok(exists($docsum{$id}), '$docsum->get_id()');
+
+ my %items = %{ $docsum{$id} };
+
+ # iterate using item names
+
+ for my $name ($ds->get_all_names()) {
+ $ct++;
+ my ($it) = $ds->get_Items_by_name($name);
+ ok(exists $items{$name},'DocSum Name exists');
+ is($it->get_name, $name, 'get_name(),DocSum Name');
+ is($ds->get_type_by_name($name), $items{$name}->[0],
+ 'get_type_by_name() from DocSum');
+ is($it->get_type, $items{$name}->[0], 'get_type() from Item');
+ }
+ }
+ is($ct, 60);
+ }
+}
+
+# ESearch, ESearch History
+
+sub esearch {
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'esearch',
+ -db => 'protein',
+ -term => $term,
+ -retmax => 100,
+ -email => $email
+ );
+
+ isa_ok($eutil, 'Bio::DB::GenericWebAgent');
+ eval {$response = $eutil->get_Response; };
+ skip("ESearch HTTP error:$@", 12) if $@;
+ isa_ok($response, 'HTTP::Response');
+
+ # can't really check for specific ID's but can check total ID's returned
+ my @esearch_ids = $eutil->get_ids;
+ is(scalar(@esearch_ids), 100, '$esearch->get_ids()');
+
+ cmp_ok($eutil->get_count, '>', 117, '$esearch->get_count()');
+
+ # usehistory
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'esearch',
+ -db => 'protein',
+ -usehistory => 'y',
+ -term => $term,
+ -retmax => 100,
+ -email => $email
+ );
+
+ eval {$response = $eutil->get_Response; };
+ skip("ESearch HTTP error:$@", 9) if $@;
+ is($eutil->eutil, 'esearch', 'eutil()');
+ is($eutil->get_database, 'protein', 'get_database()');
+ cmp_ok($eutil->get_count, '>', 117, 'get_count()');
+ is($eutil->get_term, $term, 'get_term()');
+ is($eutil->get_ids, 100, 'History->get_ids()');
+
+ my $history = $eutil->next_History;
+ isa_ok($history, 'Bio::Tools::EUtilities::HistoryI');
+
+ # check the actual data
+ my ($webenv, $key) = $history->history;
+ like($webenv, qr{^\S{15}}, 'WebEnv');
+ like($key, qr{^\d+}, 'query key');
+
+ # can we fetch the sequences?
+ $eutil->set_parameters(
+ -eutil => 'efetch',
+ -history => $history,
+ -rettype => 'fasta',
+ -retmax => 5
+ );
+ # look for fasta headers
+ my ($r, $t);
+ eval{ $r = $eutil->get_Response->content;};
+ skip("EPost HTTP error", 1) if $@;
+ $t = grep m{^>.*$}, split("\n", $r);
+ is($t, 5, 'EPost to EFetch');
+ }
+}
+
+# EInfo
+
+sub einfo {
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'einfo',
+ -db => 'protein',
+ -email => $email
+ );
+ isa_ok($eutil, 'Bio::DB::GenericWebAgent');
+ eval {$response = $eutil->get_Response; };
+ skip("EInfo HTTP error:$@", 10) if $@;
+ isa_ok($response, 'HTTP::Response');
+ like($response->content, qr(<eInfoResult>), 'EInfo response');
+ is(($eutil->get_database)[0], 'protein', '$einfo->get_database()');
+ like($eutil->get_last_update, qr(\d{4}\/\d{2}\/\d{2}\s\d{2}:\d{2}),
+ '$einfo->get_last_update()');
+ cmp_ok($eutil->get_record_count, '>', 9200000, '$einfo->get_record_count()');
+ is($eutil->get_description, 'Protein sequence record', '$einfo->get_description()');
+ my @links = $eutil->get_LinkInfo;
+ my @fields = $eutil->get_FieldInfo;
+ cmp_ok(scalar(@links), '>',30, '$einfo->get_LinkInfo()');
+ cmp_ok(scalar(@fields), '>',24, '$einfo->get_FieldInfo()');
+
+ # all databases (list)
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'einfo',
+ -email => $email
+ );
+
+ eval {$response = $eutil->get_Response; };
+ skip("EInfo HTTP error:$@", 1) if $@;
+
+ my @db = sort qw(pubmed protein nucleotide nuccore nucgss nucest structure
+ genome books cancerchromosomes cdd domains gene genomeprj gensat
+ geo gds homologene journals mesh ncbisearch nlmcatalog omia omim
+ pmc popset probe pcassay pccompound pcsubstance snp taxonomy toolkit
+ unigene unists);
+
+ my @einfo_dbs = sort $eutil->get_databases;
+ cmp_ok(scalar(@einfo_dbs), '>=', scalar(@db), 'All EInfo databases');
+ }
+}
+
+
+# ELink - normal (single ID array) - single db - ElinkData tests
+
+sub elink1 {
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'elink',
+ -db => 'taxonomy',
+ -dbfrom => 'protein',
+ -id => \@ids,
+ -email => $email
+ );
+
+ isa_ok($eutil, 'Bio::DB::GenericWebAgent');
+ eval {$response = $eutil->get_Response; };
+ skip("ELink HTTP error:$@", 7) if $@;
+ isa_ok($response, 'HTTP::Response');
+ like($response->content, qr(<eLinkResult>), 'ELink response');
+ # Data is too volatile to test; commenting for now...
+ #my @ids2 = qw(350054 306537 273068 83332 1394);
+ cmp_ok($eutil->get_ids, '>=', 1);
+ #is_deeply([sort $eutil->get_ids], [sort @ids2],'$elink->get_ids()');
+
+ # Linkset tests
+ is($eutil->get_LinkSets, 1, '$elink->get_LinkSets()');
+ my $linkobj = $eutil->next_LinkSet;
+ isa_ok($linkobj, 'Bio::Tools::EUtilities::Link::LinkSet');
+ is($linkobj->get_dbfrom, 'protein', '$linkdata->get_dbfrom()');
+ #is_deeply([sort $linkobj->elink_queryids],
+ # [sort @ids], '$linkdata->elink_queryids()');
+ my $db = $linkobj->get_dbto;
+ is($db, 'taxonomy', '$linkdata->get_dbto()');
+ #is_deeply([sort $linkobj->get_LinkIds_by_db($db)],
+ # [sort @ids2], '$linkdata->get_LinkIds_by_db($db)');
+ }
+}
+
+sub elink2 {
+ my @genome_ids = qw(30807 33011 12997 16707 45843 31129 31141 31131 31133 32203 31135);
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'elink',
+ -db => 'nuccore',
+ -dbfrom => 'genomeprj',
+ -id => @genome_ids,
+ -email => $email
+ );
+
+ eval {$response = $eutil->get_Response; };
+ skip("ELink HTTP error:$@", 7) if $@;
+ isa_ok($response, 'HTTP::Response');
+ like($response->content, qr(<eLinkResult>), 'ELink response');
+ # Data is too volatile to test; commenting for now...
+ #my @ids2 = qw(350054 306537 273068 83332 1394);
+ cmp_ok($eutil->get_ids, '>=', 4);
+ #is_deeply([sort $eutil->get_ids], [sort @ids2],'$elink->get_ids()');
+
+ # Linkset tests
+ is($eutil->get_LinkSets, 1, '$elink->get_LinkSets()');
+ my $linkobj = $eutil->next_LinkSet;
+ isa_ok($linkobj, 'Bio::Tools::EUtilities::Link::LinkSet');
+ is($linkobj->get_dbfrom, 'protein', '$linkdata->get_dbfrom()');
+ #is_deeply([sort $linkobj->elink_queryids],
+ # [sort @ids], '$linkdata->elink_queryids()');
+ my $db = $linkobj->get_dbto;
+ is($db, 'taxonomy', '$linkdata->get_dbto()');
+ #is_deeply([sort $linkobj->get_LinkIds_by_db($db)],
+ # [sort @ids2], '$linkdata->get_LinkIds_by_db($db)');
+ }
+}
+
+sub egquery {
+ SKIP: {
+ $eutil = Bio::DB::EUtilities->new(
+ -eutil => 'egquery',
+ -term => $term,
+ -email => $email
+ );
+
+ isa_ok($eutil, 'Bio::DB::GenericWebAgent');
+ eval {$response = $eutil->get_Response; };
+ skip("EGQuery HTTP error:$@", 3) if $@;
+ isa_ok($response, 'HTTP::Response');
+ like($response->content, qr(<eGQueryResult>), 'EGQuery response');
+ my @gq = $eutil->get_GlobalQueries;
+ cmp_ok(scalar(@gq), '>=', 30, 'get_GlobalQueries')
+ }
+}
diff --git a/t/release-eol.t b/t/release-eol.t
new file mode 100644
index 0000000..4e06137
--- /dev/null
+++ b/t/release-eol.t
@@ -0,0 +1,85 @@
+
+BEGIN {
+ unless ($ENV{RELEASE_TESTING}) {
+ print "1..0 # SKIP these tests are for release candidate testing\n";
+ exit
+ }
+}
+
+use strict;
+use warnings;
+
+# this test was generated with Dist::Zilla::Plugin::EOLTests 0.19
+
+use Test::More 0.88;
+use Test::EOL;
+
+my @files = (
+ 'bin/bp_einfo',
+ 'bin/bp_genbank_ref_extractor',
+ 'lib/Bio/DB/EUtilities.pm',
+ 'lib/Bio/Tools/EUtilities.pm',
+ 'lib/Bio/Tools/EUtilities/EUtilDataI.pm',
+ 'lib/Bio/Tools/EUtilities/EUtilParameters.pm',
+ 'lib/Bio/Tools/EUtilities/History.pm',
+ 'lib/Bio/Tools/EUtilities/HistoryI.pm',
+ 'lib/Bio/Tools/EUtilities/Info.pm',
+ 'lib/Bio/Tools/EUtilities/Info/FieldInfo.pm',
+ 'lib/Bio/Tools/EUtilities/Info/LinkInfo.pm',
+ 'lib/Bio/Tools/EUtilities/Link.pm',
+ 'lib/Bio/Tools/EUtilities/Link/LinkSet.pm',
+ 'lib/Bio/Tools/EUtilities/Link/UrlLink.pm',
+ 'lib/Bio/Tools/EUtilities/Query.pm',
+ 'lib/Bio/Tools/EUtilities/Query/GlobalQuery.pm',
+ 'lib/Bio/Tools/EUtilities/Summary.pm',
+ 'lib/Bio/Tools/EUtilities/Summary/DocSum.pm',
+ 'lib/Bio/Tools/EUtilities/Summary/Item.pm',
+ 'lib/Bio/Tools/EUtilities/Summary/ItemContainerI.pm',
+ 't/00-compile.t',
+ 't/EUtilParameters.t',
+ 't/author-mojibake.t',
+ 't/author-pod-syntax.t',
+ 't/data/eutils/egquery.xml',
+ 't/data/eutils/einfo.xml',
+ 't/data/eutils/einfo_dbs.xml',
+ 't/data/eutils/elink_acheck.xml',
+ 't/data/eutils/elink_acheck_corr.xml',
+ 't/data/eutils/elink_dball.xml',
+ 't/data/eutils/elink_lcheck.xml',
+ 't/data/eutils/elink_lcheck_corr.xml',
+ 't/data/eutils/elink_llinks.xml',
+ 't/data/eutils/elink_llinks_corr.xml',
+ 't/data/eutils/elink_multidb.xml',
+ 't/data/eutils/elink_multidb_corr.xml',
+ 't/data/eutils/elink_ncheck.xml',
+ 't/data/eutils/elink_ncheck_corr.xml',
+ 't/data/eutils/elink_neighbor.xml',
+ 't/data/eutils/elink_neighbor_corr.xml',
+ 't/data/eutils/elink_nhist.xml',
+ 't/data/eutils/elink_nhist_corr.xml',
+ 't/data/eutils/elink_scores.xml',
+ 't/data/eutils/epost.xml',
+ 't/data/eutils/esearch1.xml',
+ 't/data/eutils/esearch2.xml',
+ 't/data/eutils/espell.xml',
+ 't/data/eutils/esummary1.xml',
+ 't/data/eutils/esummary2.xml',
+ 't/egquery.t',
+ 't/einfo.t',
+ 't/elink_acheck.t',
+ 't/elink_lcheck.t',
+ 't/elink_llinks.t',
+ 't/elink_ncheck.t',
+ 't/elink_neighbor.t',
+ 't/elink_neighbor_history.t',
+ 't/elink_scores.t',
+ 't/epost.t',
+ 't/esearch.t',
+ 't/espell.t',
+ 't/esummary.t',
+ 't/release-EUtilities.t',
+ 't/release-eol.t'
+);
+
+eol_unix_ok($_, { trailing_whitespace => 1 }) foreach @files;
+done_testing;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libbio-eutilities-perl.git
More information about the debian-med-commit
mailing list