[med-svn] [ncbi-entrez-direct] 01/08: New upstream version 6.00.20170109+ds
Aaron M. Ucko
ucko at moszumanska.debian.org
Wed Jan 25 03:00:00 UTC 2017
This is an automated email from the git hooks/post-receive script.
ucko pushed a commit to branch master
in repository ncbi-entrez-direct.
commit fa7c83c43d10aa6877ef22a951d0891fa32e604e
Author: Aaron M. Ucko <ucko at debian.org>
Date: Mon Jan 9 22:55:16 2017 -0500
New upstream version 6.00.20170109+ds
---
xtract.go | 150 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 120 insertions(+), 30 deletions(-)
diff --git a/xtract.go b/xtract.go
index c585133..ef021ee 100644
--- a/xtract.go
+++ b/xtract.go
@@ -193,7 +193,7 @@ Sequence Coordinates
-0-based Zero-Based
-1-based One-Based
- -ucsc Half-Open
+ -ucsc-based Half-Open
Command Generator
@@ -220,7 +220,7 @@ Reformatting
Modification
-filter Object
- [retain|remove|encode|decode|shrink]
+ [retain|remove|encode|decode|shrink|expand]
[content|cdata|comment|object|attributes|container]
Validation
@@ -1546,7 +1546,7 @@ const (
DEV
ZEROBASED
ONEBASED
- UCSC
+ UCSCBASED
ELSE
VARIABLE
VALUE
@@ -1662,6 +1662,10 @@ var argTypeIs = map[string]ArgumentType{
"-1-based": EXTRACTION,
"-one-based": EXTRACTION,
"-ucsc": EXTRACTION,
+ "-ucsc-based": EXTRACTION,
+ "-ucsc-coords": EXTRACTION,
+ "-bed-based": EXTRACTION,
+ "-bed-coords": EXTRACTION,
"-else": EXTRACTION,
"-pfx": CUSTOMIZATION,
"-sfx": CUSTOMIZATION,
@@ -1729,7 +1733,11 @@ var opTypeIs = map[string]OpType{
"-zero-based": ZEROBASED,
"-1-based": ONEBASED,
"-one-based": ONEBASED,
- "-ucsc": UCSC,
+ "-ucsc": UCSCBASED,
+ "-ucsc-based": UCSCBASED,
+ "-ucsc-coords": UCSCBASED,
+ "-bed-based": UCSCBASED,
+ "-bed-coords": UCSCBASED,
"-else": ELSE,
}
@@ -2573,19 +2581,31 @@ func ParseArguments(args []string, pttrn string) *Block {
// literal numeric constant
tsk := &Step{Type: status, Value: str}
op.Stages = append(op.Stages, tsk)
- } else if (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') {
+ } else {
// numeric test allows element as second argument
- prnt, match := SplitInTwoAt(str, "/", RIGHT)
- match, attrib := SplitInTwoAt(match, "@", LEFT)
- wildcard := false
- if strings.HasPrefix(prnt, ":") || strings.HasPrefix(match, ":") || strings.HasPrefix(attrib, ":") {
- wildcard = true
+ orig := str
+ if ch == '#' || ch == '%' || ch == '^' {
+ // check for pound, percent, or caret character at beginning of element (undocumented)
+ str = str[1:]
+ if len(str) < 1 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unexpected numeric match constraints\n")
+ os.Exit(1)
+ }
+ ch = str[0]
+ }
+ if (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') {
+ prnt, match := SplitInTwoAt(str, "/", RIGHT)
+ match, attrib := SplitInTwoAt(match, "@", LEFT)
+ wildcard := false
+ if strings.HasPrefix(prnt, ":") || strings.HasPrefix(match, ":") || strings.HasPrefix(attrib, ":") {
+ wildcard = true
+ }
+ tsk := &Step{Type: status, Value: orig, Parent: prnt, Match: match, Attrib: attrib, Wild: wildcard}
+ op.Stages = append(op.Stages, tsk)
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unexpected numeric match constraints\n")
+ os.Exit(1)
}
- tsk := &Step{Type: status, Value: str, Parent: prnt, Match: match, Attrib: attrib, Wild: wildcard}
- op.Stages = append(op.Stages, tsk)
- } else {
- fmt.Fprintf(os.Stderr, "\nERROR: Unexpected numeric match constraints\n")
- os.Exit(1)
}
op = nil
} else {
@@ -2650,7 +2670,7 @@ func ParseArguments(args []string, pttrn string) *Block {
comm = append(comm, op)
status = UNSET
case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE:
- case NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+ case NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSCBASED:
case TAB, RET, PFX, SFX, SEP, LBL, PFC, DEF:
case UNSET:
fmt.Fprintf(os.Stderr, "\nERROR: No -element before '%s'\n", str)
@@ -2738,7 +2758,7 @@ func ParseArguments(args []string, pttrn string) *Block {
// sequence coordinate adjustments
switch status {
- case ZEROBASED, ONEBASED, UCSC:
+ case ZEROBASED, ONEBASED, UCSCBASED:
seq := pttrn + ":"
if attrib != "" {
seq += "@"
@@ -2765,7 +2785,7 @@ func ParseArguments(args []string, pttrn string) *Block {
if seqtype.Based == 0 {
status = INC
}
- case UCSC:
+ case UCSCBASED:
status = ELEMENT
// half-open intervals, start is 0-based, stop is 1-based
if seqtype.Based == 0 && seqtype.Which == ISSTOP {
@@ -2800,7 +2820,7 @@ func ParseArguments(args []string, pttrn string) *Block {
switch status {
case UNSET:
status = nextStatus(str)
- case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+ case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSCBASED:
for !strings.HasPrefix(str, "-") {
// create one operation per argument, even if under a single -element statement
op := &Operation{Type: status, Value: str}
@@ -4107,6 +4127,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
DOENCODE
DODECODE
DOSHRINK
+ DOEXPAND
)
action := args[0]
@@ -4123,6 +4144,8 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
what = DODECODE
case "shrink":
what = DOSHRINK
+ case "expand":
+ what = DOEXPAND
default:
fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized action '%s' supplied to xtract -filter\n", action)
os.Exit(1)
@@ -4151,6 +4174,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
}
inPattern := false
+ prevName := ""
for {
tag, name, attr, _, idx := nextToken(Idx)
@@ -4158,6 +4182,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
switch tag {
case STARTTAG:
+ prevName = name
if name == pttrn {
inPattern = true
if which == CONTAINERTAG && what == DOREMOVE {
@@ -4213,6 +4238,46 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
if inPattern && which == OBJECTTAG && what == DOREMOVE {
continue
}
+ if inPattern && which == CONTENTTAG && what == DOEXPAND {
+ var words []string
+ if strings.Contains(name, "|") {
+ words = strings.FieldsFunc(name, func(c rune) bool {
+ return c == '|'
+ })
+ } else if strings.Contains(name, ",") {
+ words = strings.FieldsFunc(name, func(c rune) bool {
+ return c == ','
+ })
+ } else {
+ words = strings.Fields(name)
+ }
+ between := ""
+ for _, item := range words {
+ max := len(item)
+ for max > 1 {
+ ch := item[max-1]
+ if ch != '.' && ch != ',' && ch != ':' && ch != ';' {
+ break
+ }
+ // trim trailing punctuation
+ item = item[:max-1]
+ // continue checking for runs of punctuation at end
+ max--
+ }
+ if HasFlankingSpace(item) {
+ item = strings.TrimSpace(item)
+ }
+ if item != "" {
+ if between != "" {
+ buffer.WriteString(between)
+ }
+ buffer.WriteString(item)
+ buffer.WriteString("\n")
+ between = "</" + prevName + ">\n<" + prevName + ">\n"
+ }
+ }
+ continue
+ }
if inPattern && which == tag {
switch what {
case DORETAIN:
@@ -5735,7 +5800,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
val := strconv.Itoa(index)
acc(val)
case INC:
- // -inc, or component of -0-based, -1-based, or -ucsc
+ // -inc, or component of -0-based, -1-based, or -ucsc-based
exploreElements(func(str string, lvl int) {
if str != "" {
num, err := strconv.Atoi(str)
@@ -5748,7 +5813,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
}
})
case DEC:
- // -dec, or component of -0-based, -1-based, or -ucsc
+ // -dec, or component of -0-based, -1-based, or -ucsc-based
exploreElements(func(str string, lvl int) {
if str != "" {
num, err := strconv.Atoi(str)
@@ -5818,7 +5883,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
between := ""
switch status {
- case ELEMENT, ENCODE, UPPER, LOWER, TITLE, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSC:
+ case ELEMENT, ENCODE, UPPER, LOWER, TITLE, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSCBASED:
processElement(func(str string) {
if str != "" {
ok = true
@@ -6152,7 +6217,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
str := op.Value
switch op.Type {
- case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+ case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSCBASED:
txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, sep, def, op.Type, index, level, variables)
if ok {
tab = col
@@ -6271,14 +6336,39 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in
case GT, GE, LT, LE, EQ, NE:
// second argument of numeric test can be element specifier
if constraint.Parent != "" || constraint.Match != "" || constraint.Attrib != "" {
- ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(str string, lvl int) {
- if str != "" {
- _, errz := strconv.Atoi(str)
- if errz == nil {
- val = str
+ ch := val[0]
+ // pound, percent, and caret prefixes supported as potentially useful for data QA (undocumented)
+ switch ch {
+ case '#':
+ count := 0
+ ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+ count++
+ })
+ val = strconv.Itoa(count)
+ case '%':
+ length := 0
+ ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+ if stn != "" {
+ length += len(stn)
}
- }
- })
+ })
+ val = strconv.Itoa(length)
+ case '^':
+ depth := 0
+ ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+ depth = lvl
+ })
+ val = strconv.Itoa(depth)
+ default:
+ ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+ if stn != "" {
+ _, errz := strconv.Atoi(stn)
+ if errz == nil {
+ val = stn
+ }
+ }
+ })
+ }
}
// numeric tests on element values
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ncbi-entrez-direct.git
More information about the debian-med-commit
mailing list