[med-svn] [ncbi-entrez-direct] 01/06: New upstream version 6.00.20170105+ds
Aaron M. Ucko
ucko at moszumanska.debian.org
Fri Jan 6 03:35:16 UTC 2017
This is an automated email from the git hooks/post-receive script.
ucko pushed a commit to branch master
in repository ncbi-entrez-direct.
commit 5d2c52c50c69fa0ea9b318fb3619ac9fe8b00d73
Author: Aaron M. Ucko <ucko at debian.org>
Date: Thu Jan 5 22:12:51 2017 -0500
New upstream version 6.00.20170105+ds
---
edirect.pl | 27 +++++-
nquire | 2 +
xtract.go | 304 ++++++++++++++++++++++++++++++++++++-------------------------
3 files changed, 206 insertions(+), 127 deletions(-)
diff --git a/edirect.pl b/edirect.pl
index 712fb5e..9bf7d77 100755
--- a/edirect.pl
+++ b/edirect.pl
@@ -87,7 +87,7 @@ use constant true => 1;
# EDirect version number
-$version = "5.90";
+$version = "6.00";
# URL address components
@@ -127,6 +127,7 @@ sub clearflags {
$emaddr = "";
$email = "";
$err = "";
+ $extend = -1;
$extrafeat = -1;
$field = "";
$fields = false;
@@ -1748,6 +1749,11 @@ sub esmry {
$data = fix_bad_encoding($dbase, $data);
+ # remove eSummaryResult wrapper
+ $data =~ s/<!DOCTYPE eSummaryResult PUBLIC/<!DOCTYPE DocumentSummarySet PUBLIC/g;
+ $data =~ s/<eSummaryResult>//g;
+ $data =~ s/<\/eSummaryResult>//g;
+
print "$data";
}
@@ -1862,6 +1868,11 @@ sub esmry {
$data = fix_bad_encoding($dbase, $data);
+ # remove eSummaryResult wrapper
+ $data =~ s/<!DOCTYPE eSummaryResult PUBLIC/<!DOCTYPE DocumentSummarySet PUBLIC/g;
+ $data =~ s/<eSummaryResult>//g;
+ $data =~ s/<\/eSummaryResult>//g;
+
print "$data";
}
@@ -1887,13 +1898,18 @@ Sequence Range
-seq_start First sequence position to retrieve
-seq_stop Last sequence position to retrieve
-strand Strand of DNA to retrieve
- -complexity 0 = default, 1 = bioseq, 3 = nuc-prot set
Gene Range
-chr_start Sequence range from 0-based coordinates
-chr_stop in gene docsum GenomicInfoType object
+Miscellaneous
+
+ -complexity 0 = default, 1 = bioseq, 3 = nuc-prot set
+ -extend Extend sequence retrieval in both directions
+ -extrafeat Bit flag specifying extra features
+
Format Examples
-db -format -mode Report Type
@@ -2035,6 +2051,7 @@ sub eftch {
"complexity=i" => \$complexity,
"chr_start=i" => \$chr_start,
"chr_stop=i" => \$chr_stop,
+ "extend=i" => \$extend,
"extrafeat=i" => \$extrafeat,
"start=i" => \$min,
"stop=i" => \$max,
@@ -2322,6 +2339,12 @@ sub eftch {
}
}
+ # optionally extend retrieved sequence range in both directions
+ if ( $extend > 0 ) {
+ $seq_start -= $extend;
+ $seq_stop += $extend;
+ }
+
if ( $strand ne "" ) {
$arg .= "&strand=$strand";
}
diff --git a/nquire b/nquire
index 6cab866..68777b0 100755
--- a/nquire
+++ b/nquire
@@ -240,6 +240,8 @@ Examples
nquire -eutils efetch.fcgi -db pubmed -id 2539356 -rettype medline -retmode text
+ nquire -eutils esummary.fcgi -db pubmed -id 2539356 -version 2.0
+
nquire -url "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" elink.fcgi \\
-dbfrom protein -db protein -cmd neighbor -linkname protein_protein -id NP_476532.1
diff --git a/xtract.go b/xtract.go
index ee09786..c585133 100644
--- a/xtract.go
+++ b/xtract.go
@@ -63,7 +63,7 @@ import (
// VERSION AND HELP MESSAGE TEXT
-const xtractVersion = "5.90"
+const xtractVersion = "6.00"
const xtractHelp = `
Overview
@@ -141,9 +141,6 @@ Element Selection
-element Print all items that match tag name
-first Only print value of first item
-last Only print value of last item
- -encode URL-encode <, >, &, ", and ' characters
- -upper Convert text to upper-case
- -lower Convert text to lower-case
-NAME Record value in named variable
-element Constructs
@@ -156,9 +153,14 @@ Element Selection
Object Count "#Author"
Item Length "%Title"
Element Depth "^PMID"
+ Variable "&NAME"
+
+Special -element Operations
+
Parent Index "+"
XML Subtree "*"
- Variable "&NAME"
+ Children "$"
+ Attributes "@"
Numeric Processing
@@ -173,6 +175,13 @@ Numeric Processing
-avg Average
-dev Deviation
+String Processing
+
+ -encode URL-encode <, >, &, ", and ' characters
+ -upper Convert text to upper-case
+ -lower Convert text to lower-case
+ -title Capitalize initial letters of words
+
Phrase Processing
-terms Partition phrase at spaces
@@ -193,7 +202,7 @@ Command Generator
-insd Argument Order
Descriptors INSDSeq_sequence INSDSeq_definition INSDSeq_division
- Flags complete or partial [optional]
+ Flags [complete|partial]
Feature(s) CDS,mRNA
Qualifiers INSDFeature_key "#INSDInterval" gene product
@@ -212,7 +221,7 @@ Modification
-filter Object
[retain|remove|encode|decode|shrink]
- [content|cdata|comment|object|attributes]
+ [content|cdata|comment|object|attributes|container]
Validation
@@ -423,34 +432,34 @@ Peptide Sequences
xtract -insd complete mat_peptide "%peptide" product peptide |
grep -i conotoxin | sort -t $'\t' -u -k 2,2n | head -n 8
- ADB43131.1 15 conotoxin Cal 1b LCCKRHHGCHPCGRT
- AIC77099.1 16 conotoxin Im1.2 GCCSHPACNVNNPHIC
- AIC77105.1 17 conotoxin Lt1.4 GCCSHPACDVNNPDICG
- AIC77103.1 18 conotoxin Lt1.2 PRCCSNPACNANHAEICG
- AIC77083.1 20 conotoxin Bt14.6 KDCTYCMHSSCSMMYEKCRP
- AIC77085.1 21 conotoxin Bt14.8 NECDNCMRSFCSMIYEKCRLK
- AIC77093.1 22 conotoxin Bt14.16 GDCKPCMHPDCRFNPGRCRPRE
- AIC77154.1 23 conotoxin Bt14.19 VREKDCPPHPVPGMHKCVCLKTC
+ ADB43131.1 15 conotoxin Cal 1b LCCKRHHGCHPCGRT
+ ADB43128.1 16 conotoxin Cal 5.1 DPAPCCQHPIETCCRR
+ AIC77105.1 17 conotoxin Lt1.4 GCCSHPACDVNNPDICG
+ ADB43129.1 18 conotoxin Cal 5.2 MIQRSQCCAVKKNCCHVG
+ ADD97803.1 20 conotoxin Cal 1.2 AGCCPTIMYKTGACRTNRCR
+ AIC77085.1 21 conotoxin Bt14.8 NECDNCMRSFCSMIYEKCRLK
+ ADB43125.1 22 conotoxin Cal 14.2 GCPADCPNTCDSSNKCSPGFPG
+ AIC77154.1 23 conotoxin Bt14.19 VREKDCPPHPVPGMHKCVCLKTC
Chromosome Locations
esearch -db gene -query "calmodulin [PFN] AND mammalia [ORGN]" |
efetch -format docsum |
- xtract -pattern DocumentSummary -MAP "(-)" -MAP MapLocation \
- -element Id Name "&MAP" ScientificName
-
- 801 CALM1 14q32.11 Homo sapiens
- 808 CALM3 19q13.2-q13.3 Homo sapiens
- 805 CALM2 2p21 Homo sapiens
- 24242 Calm1 6q31-q32 Rattus norvegicus
- 12313 Calm1 12 E Mus musculus
- 326597 CALM - Bos taurus
- 50663 Calm2 6q11-q12 Rattus norvegicus
- 24244 Calm3 1q22 Rattus norvegicus
- 12315 Calm3 7 9.15 cM Mus musculus
- 12314 Calm2 17 E4 Mus musculus
- 617095 CALM1 - Bos taurus
- 396838 CALM3 6 Sus scrofa
+ xtract -pattern DocumentSummary \
+ -def "-" -element Id Name MapLocation ScientificName
+
+ 801 CALM1 14q32.11 Homo sapiens
+ 808 CALM3 19q13.32 Homo sapiens
+ 805 CALM2 2p21 Homo sapiens
+ 24242 Calm1 6q32 Rattus norvegicus
+ 12313 Calm1 12 E Mus musculus
+ 326597 CALM - Bos taurus
+ 50663 Calm2 6q12 Rattus norvegicus
+ 24244 Calm3 1q21 Rattus norvegicus
+ 12315 Calm3 7 9.15 cM Mus musculus
+ 12314 Calm2 17 E4 Mus musculus
+ 617095 CALM1 - Bos taurus
+ 396838 CALM3 6 Sus scrofa
...
Gene Regions
@@ -671,20 +680,17 @@ Genome Range
xtract -pattern DocumentSummary -NAME Name -DESC Description \
-block GenomicInfoType -if ChrLoc -equals Y \
-min ChrStart,ChrStop -element "&NAME" "&DESC" |
- sort -k 1,1n | cut -f 2- |
+ sort -k 1,1n | cut -f 2- | grep -v uncharacterized |
between-two-genes ASMT IL3RA
- IL3RA interleukin 3 receptor subunit alpha
- LOC101928032 uncharacterized LOC101928032
- LOC101928055 uncharacterized LOC101928055
- SLC25A6 solute carrier family 25 member 6
- LOC105373102 uncharacterized LOC105373102
- LINC00106 long intergenic non-protein coding RNA 106
- ASMTL-AS1 ASMTL antisense RNA 1
- ASMTL acetylserotonin O-methyltransferase-like
- P2RY8 purinergic receptor P2Y8
- AKAP17A A-kinase anchoring protein 17A
- ASMT acetylserotonin O-methyltransferase
+ IL3RA interleukin 3 receptor subunit alpha
+ SLC25A6 solute carrier family 25 member 6
+ LINC00106 long intergenic non-protein coding RNA 106
+ ASMTL-AS1 ASMTL antisense RNA 1
+ ASMTL acetylserotonin O-methyltransferase-like
+ P2RY8 purinergic receptor P2Y8
+ AKAP17A A-kinase anchoring protein 17A
+ ASMT acetylserotonin O-methyltransferase
Amino Acid Substitutions
@@ -1481,6 +1487,7 @@ const (
CDATATAG
COMMENTTAG
OBJECTTAG
+ CONTAINERTAG
ISCLOSED
)
@@ -1494,6 +1501,7 @@ const (
ENCODE
UPPER
LOWER
+ TITLE
TERMS
WORDS
PAIRS
@@ -1543,6 +1551,8 @@ const (
VARIABLE
VALUE
STAR
+ DOLLAR
+ ATSIGN
COUNT
LENGTH
DEPTH
@@ -1632,6 +1642,7 @@ var argTypeIs = map[string]ArgumentType{
"-encode": EXTRACTION,
"-upper": EXTRACTION,
"-lower": EXTRACTION,
+ "-title": EXTRACTION,
"-terms": EXTRACTION,
"-words": EXTRACTION,
"-pairs": EXTRACTION,
@@ -1671,6 +1682,7 @@ var opTypeIs = map[string]OpType{
"-encode": ENCODE,
"-upper": UPPER,
"-lower": LOWER,
+ "-title": TITLE,
"-terms": TERMS,
"-words": WORDS,
"-pairs": PAIRS,
@@ -2637,7 +2649,7 @@ func ParseArguments(args []string, pttrn string) *Block {
op := &Operation{Type: status, Value: ""}
comm = append(comm, op)
status = UNSET
- case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TERMS, WORDS, PAIRS, PHRASE:
+ case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE:
case NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
case TAB, RET, PFX, SFX, SEP, LBL, PFC, DEF:
case UNSET:
@@ -2699,10 +2711,18 @@ func ParseArguments(args []string, pttrn string) *Block {
status = STAR
default:
}
- } else if item == "*" {
- status = STAR
- } else if item == "+" {
- status = INDEX
+ } else {
+ switch item {
+ case "*":
+ status = STAR
+ case "+":
+ status = INDEX
+ case "$":
+ status = DOLLAR
+ case "@":
+ status = ATSIGN
+ default:
+ }
}
// parse parent/element at attribute construct
@@ -2780,7 +2800,7 @@ func ParseArguments(args []string, pttrn string) *Block {
switch status {
case UNSET:
status = nextStatus(str)
- case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+ case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
for !strings.HasPrefix(str, "-") {
// create one operation per argument, even if under a single -element statement
op := &Operation{Type: status, Value: str}
@@ -3000,7 +3020,7 @@ func (rdr *XMLReader) NextBlock() string {
rdr.Remainder = ""
if m > 16384 {
// previous remainder is larger than reserved section, write and signal need to continue reading
- return string(rdr.Buffer[:]), true, false
+ return string(rdr.Buffer[:m]), true, false
}
// read next block, append behind copied remainder from previous read
@@ -4123,6 +4143,8 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
case "object":
// object normally retained
which = OBJECTTAG
+ case "container":
+ which = CONTAINERTAG
default:
fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized target '%s' supplied to xtract -filter\n", trget)
os.Exit(1)
@@ -4138,6 +4160,9 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
case STARTTAG:
if name == pttrn {
inPattern = true
+ if which == CONTAINERTAG && what == DOREMOVE {
+ continue
+ }
}
if inPattern && which == OBJECTTAG && what == DOREMOVE {
continue
@@ -4174,6 +4199,9 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
if which == OBJECTTAG && what == DOREMOVE {
continue
}
+ if which == CONTAINERTAG && what == DOREMOVE {
+ continue
+ }
}
if inPattern && which == OBJECTTAG && what == DOREMOVE {
continue
@@ -5274,6 +5302,70 @@ func ProcessHydra(isPipe bool) []string {
// COLLECT AND FORMAT REQUESTED XML VALUES
+// ParseAttributes is only run if attribute values are requested in element statements
+func ParseAttributes(attrb string) []string {
+
+ if attrb == "" {
+ return nil
+ }
+
+ attlen := len(attrb)
+
+ // count equal signs
+ num := 0
+ for i := 0; i < attlen; i++ {
+ if attrb[i] == '=' {
+ num += 2
+ }
+ }
+ if num < 1 {
+ return nil
+ }
+
+ // allocate array of proper size
+ arry := make([]string, num)
+ if arry == nil {
+ return nil
+ }
+
+ start := 0
+ idx := 0
+ itm := 0
+
+ // place tag and value in successive array slots
+ for idx < attlen && itm < num {
+ ch := attrb[idx]
+ if ch == '=' {
+ // skip past possible leading blanks
+ for start < attlen {
+ ch = attrb[start]
+ if ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r' || ch == '\f' {
+ start++
+ } else {
+ break
+ }
+ }
+ // =
+ arry[itm] = attrb[start:idx]
+ itm++
+ // skip past equal sign and leading double quote
+ idx += 2
+ start = idx
+ } else if ch == '"' {
+ // "
+ arry[itm] = attrb[start:idx]
+ itm++
+ // skip past trailing double quote and (possible) space
+ idx += 2
+ start = idx
+ } else {
+ idx++
+ }
+ }
+
+ return arry
+}
+
// ExploreElements returns matching element values to callback
func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool, level int, proc func(string, int)) {
@@ -5298,70 +5390,6 @@ func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool
return
}
- // parseAttributes is only run if attribute values are requested in element statements
- parseAttributes := func(attrb string) []string {
-
- if attrb == "" {
- return nil
- }
-
- attlen := len(attrb)
-
- // count equal signs
- num := 0
- for i := 0; i < attlen; i++ {
- if attrb[i] == '=' {
- num += 2
- }
- }
- if num < 1 {
- return nil
- }
-
- // allocate array of proper size
- arry := make([]string, num)
- if arry == nil {
- return nil
- }
-
- start := 0
- idx := 0
- itm := 0
-
- // place tag and value in successive array slots
- for idx < attlen && itm < num {
- ch := attrb[idx]
- if ch == '=' {
- // skip past possible leading blanks
- for start < attlen {
- ch = attrb[start]
- if ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r' || ch == '\f' {
- start++
- } else {
- break
- }
- }
- // =
- arry[itm] = attrb[start:idx]
- itm++
- // skip past equal sign and leading double quote
- idx += 2
- start = idx
- } else if ch == '"' {
- // "
- arry[itm] = attrb[start:idx]
- itm++
- // skip past trailing double quote and (possible) space
- idx += 2
- start = idx
- } else {
- idx++
- }
- }
-
- return arry
- }
-
// wildcard matches any namespace prefix
if curr.Name == match ||
(wildcard && strings.HasPrefix(match, ":") && strings.HasSuffix(curr.Name, match)) ||
@@ -5374,7 +5402,7 @@ func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool
if attrib != "" {
if curr.Attributes != "" && curr.Attribs == nil {
// parse attributes on-the-fly if queried
- curr.Attribs = parseAttributes(curr.Attributes)
+ curr.Attribs = ParseAttributes(curr.Attributes)
}
for i := 0; i < len(curr.Attribs)-1; i += 2 {
// attributes now parsed into array as [ tag, value, tag, value, tag, value, ... ]
@@ -5402,6 +5430,12 @@ func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool
// for XML container object, send empty string to callback to increment count
proc("", level)
// and continue exploring
+
+ } else if curr.Attributes != "" {
+
+ // for self-closing object, indicate presence by sending empty string to callback
+ proc("", level)
+ return
}
}
}
@@ -5656,6 +5690,14 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
acc(str)
}
})
+ case TITLE:
+ exploreElements(func(str string, lvl int) {
+ if str != "" {
+ str = strings.ToLower(str)
+ str = strings.Title(str)
+ acc(str)
+ }
+ })
case VARIABLE:
// use value of stored variable
val, ok := variables[match]
@@ -5750,6 +5792,17 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
if txt != "" {
acc(txt)
}
+ case DOLLAR:
+ for chld := curr.Children; chld != nil; chld = chld.Next {
+ acc(chld.Name)
+ }
+ case ATSIGN:
+ if curr.Attributes != "" && curr.Attribs == nil {
+ curr.Attribs = ParseAttributes(curr.Attributes)
+ }
+ for i := 0; i < len(curr.Attribs)-1; i += 2 {
+ acc(curr.Attribs[i])
+ }
default:
}
}
@@ -5765,7 +5818,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
between := ""
switch status {
- case ELEMENT, ENCODE, UPPER, LOWER, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSC:
+ case ELEMENT, ENCODE, UPPER, LOWER, TITLE, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSC:
processElement(func(str string) {
if str != "" {
ok = true
@@ -6099,7 +6152,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
str := op.Value
switch op.Type {
- case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+ case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, sep, def, op.Type, index, level, variables)
if ok {
tab = col
@@ -7461,15 +7514,6 @@ func main() {
fmt.Fprintf(os.Stderr, "\nERROR: Input data from both stdin and file '%s', mode is '%s'\n", fileName, mode)
os.Exit(1)
}
-
- } else if runtime.GOOS != "windows" {
-
- fromStdin := bool((fi.Mode() & os.ModeCharDevice) == 0)
- if !isPipe || !fromStdin {
- mode := fi.Mode().String()
- fmt.Fprintf(os.Stderr, "\nERROR: No data supplied to xtract from stdin or file, mode is '%s'\n", mode)
- os.Exit(1)
- }
}
// check for -input command after extraction arguments
@@ -7665,6 +7709,16 @@ func main() {
// CONFIRM INPUT DATA AVAILABILITY AFTER RUNNING COMMAND GENERATORS
+ if fileName == "" && runtime.GOOS != "windows" {
+
+ fromStdin := bool((fi.Mode() & os.ModeCharDevice) == 0)
+ if !isPipe || !fromStdin {
+ mode := fi.Mode().String()
+ fmt.Fprintf(os.Stderr, "\nERROR: No data supplied to xtract from stdin or file, mode is '%s'\n", mode)
+ os.Exit(1)
+ }
+ }
+
if testCount < 1 && !usingFile && !isPipe {
fmt.Fprintf(os.Stderr, "\nERROR: No XML input data supplied to xtract\n")
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ncbi-entrez-direct.git
More information about the debian-med-commit
mailing list