[med-svn] [ncbi-entrez-direct] 01/08: New upstream version 6.00.20170109+ds

Aaron M. Ucko ucko at moszumanska.debian.org
Wed Jan 25 03:00:00 UTC 2017


This is an automated email from the git hooks/post-receive script.

ucko pushed a commit to branch master
in repository ncbi-entrez-direct.

commit fa7c83c43d10aa6877ef22a951d0891fa32e604e
Author: Aaron M. Ucko <ucko at debian.org>
Date:   Mon Jan 9 22:55:16 2017 -0500

    New upstream version 6.00.20170109+ds
---
 xtract.go | 150 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 120 insertions(+), 30 deletions(-)

diff --git a/xtract.go b/xtract.go
index c585133..ef021ee 100644
--- a/xtract.go
+++ b/xtract.go
@@ -193,7 +193,7 @@ Sequence Coordinates
 
   -0-based         Zero-Based
   -1-based         One-Based
-  -ucsc            Half-Open
+  -ucsc-based      Half-Open
 
 Command Generator
 
@@ -220,7 +220,7 @@ Reformatting
 Modification
 
   -filter          Object
-                     [retain|remove|encode|decode|shrink]
+                     [retain|remove|encode|decode|shrink|expand]
                        [content|cdata|comment|object|attributes|container]
 
 Validation
@@ -1546,7 +1546,7 @@ const (
 	DEV
 	ZEROBASED
 	ONEBASED
-	UCSC
+	UCSCBASED
 	ELSE
 	VARIABLE
 	VALUE
@@ -1662,6 +1662,10 @@ var argTypeIs = map[string]ArgumentType{
 	"-1-based":     EXTRACTION,
 	"-one-based":   EXTRACTION,
 	"-ucsc":        EXTRACTION,
+	"-ucsc-based":  EXTRACTION,
+	"-ucsc-coords": EXTRACTION,
+	"-bed-based":   EXTRACTION,
+	"-bed-coords":  EXTRACTION,
 	"-else":        EXTRACTION,
 	"-pfx":         CUSTOMIZATION,
 	"-sfx":         CUSTOMIZATION,
@@ -1729,7 +1733,11 @@ var opTypeIs = map[string]OpType{
 	"-zero-based":  ZEROBASED,
 	"-1-based":     ONEBASED,
 	"-one-based":   ONEBASED,
-	"-ucsc":        UCSC,
+	"-ucsc":        UCSCBASED,
+	"-ucsc-based":  UCSCBASED,
+	"-ucsc-coords": UCSCBASED,
+	"-bed-based":   UCSCBASED,
+	"-bed-coords":  UCSCBASED,
 	"-else":        ELSE,
 }
 
@@ -2573,19 +2581,31 @@ func ParseArguments(args []string, pttrn string) *Block {
 						// literal numeric constant
 						tsk := &Step{Type: status, Value: str}
 						op.Stages = append(op.Stages, tsk)
-					} else if (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') {
+					} else {
 						// numeric test allows element as second argument
-						prnt, match := SplitInTwoAt(str, "/", RIGHT)
-						match, attrib := SplitInTwoAt(match, "@", LEFT)
-						wildcard := false
-						if strings.HasPrefix(prnt, ":") || strings.HasPrefix(match, ":") || strings.HasPrefix(attrib, ":") {
-							wildcard = true
+						orig := str
+						if ch == '#' || ch == '%' || ch == '^' {
+							// check for pound, percent, or caret character at beginning of element (undocumented)
+							str = str[1:]
+							if len(str) < 1 {
+								fmt.Fprintf(os.Stderr, "\nERROR: Unexpected numeric match constraints\n")
+								os.Exit(1)
+							}
+							ch = str[0]
+						}
+						if (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') {
+							prnt, match := SplitInTwoAt(str, "/", RIGHT)
+							match, attrib := SplitInTwoAt(match, "@", LEFT)
+							wildcard := false
+							if strings.HasPrefix(prnt, ":") || strings.HasPrefix(match, ":") || strings.HasPrefix(attrib, ":") {
+								wildcard = true
+							}
+							tsk := &Step{Type: status, Value: orig, Parent: prnt, Match: match, Attrib: attrib, Wild: wildcard}
+							op.Stages = append(op.Stages, tsk)
+						} else {
+							fmt.Fprintf(os.Stderr, "\nERROR: Unexpected numeric match constraints\n")
+							os.Exit(1)
 						}
-						tsk := &Step{Type: status, Value: str, Parent: prnt, Match: match, Attrib: attrib, Wild: wildcard}
-						op.Stages = append(op.Stages, tsk)
-					} else {
-						fmt.Fprintf(os.Stderr, "\nERROR: Unexpected numeric match constraints\n")
-						os.Exit(1)
 					}
 					op = nil
 				} else {
@@ -2650,7 +2670,7 @@ func ParseArguments(args []string, pttrn string) *Block {
 				comm = append(comm, op)
 				status = UNSET
 			case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE:
-			case NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+			case NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSCBASED:
 			case TAB, RET, PFX, SFX, SEP, LBL, PFC, DEF:
 			case UNSET:
 				fmt.Fprintf(os.Stderr, "\nERROR: No -element before '%s'\n", str)
@@ -2738,7 +2758,7 @@ func ParseArguments(args []string, pttrn string) *Block {
 
 				// sequence coordinate adjustments
 				switch status {
-				case ZEROBASED, ONEBASED, UCSC:
+				case ZEROBASED, ONEBASED, UCSCBASED:
 					seq := pttrn + ":"
 					if attrib != "" {
 						seq += "@"
@@ -2765,7 +2785,7 @@ func ParseArguments(args []string, pttrn string) *Block {
 						if seqtype.Based == 0 {
 							status = INC
 						}
-					case UCSC:
+					case UCSCBASED:
 						status = ELEMENT
 						// half-open intervals, start is 0-based, stop is 1-based
 						if seqtype.Based == 0 && seqtype.Which == ISSTOP {
@@ -2800,7 +2820,7 @@ func ParseArguments(args []string, pttrn string) *Block {
 			switch status {
 			case UNSET:
 				status = nextStatus(str)
-			case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+			case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSCBASED:
 				for !strings.HasPrefix(str, "-") {
 					// create one operation per argument, even if under a single -element statement
 					op := &Operation{Type: status, Value: str}
@@ -4107,6 +4127,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 			DOENCODE
 			DODECODE
 			DOSHRINK
+			DOEXPAND
 		)
 
 		action := args[0]
@@ -4123,6 +4144,8 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 			what = DODECODE
 		case "shrink":
 			what = DOSHRINK
+		case "expand":
+			what = DOEXPAND
 		default:
 			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized action '%s' supplied to xtract -filter\n", action)
 			os.Exit(1)
@@ -4151,6 +4174,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 		}
 
 		inPattern := false
+		prevName := ""
 
 		for {
 			tag, name, attr, _, idx := nextToken(Idx)
@@ -4158,6 +4182,7 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 
 			switch tag {
 			case STARTTAG:
+				prevName = name
 				if name == pttrn {
 					inPattern = true
 					if which == CONTAINERTAG && what == DOREMOVE {
@@ -4213,6 +4238,46 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special
 				if inPattern && which == OBJECTTAG && what == DOREMOVE {
 					continue
 				}
+				if inPattern && which == CONTENTTAG && what == DOEXPAND {
+					var words []string
+					if strings.Contains(name, "|") {
+						words = strings.FieldsFunc(name, func(c rune) bool {
+							return c == '|'
+						})
+					} else if strings.Contains(name, ",") {
+						words = strings.FieldsFunc(name, func(c rune) bool {
+							return c == ','
+						})
+					} else {
+						words = strings.Fields(name)
+					}
+					between := ""
+					for _, item := range words {
+						max := len(item)
+						for max > 1 {
+							ch := item[max-1]
+							if ch != '.' && ch != ',' && ch != ':' && ch != ';' {
+								break
+							}
+							// trim trailing punctuation
+							item = item[:max-1]
+							// continue checking for runs of punctuation at end
+							max--
+						}
+						if HasFlankingSpace(item) {
+							item = strings.TrimSpace(item)
+						}
+						if item != "" {
+							if between != "" {
+								buffer.WriteString(between)
+							}
+							buffer.WriteString(item)
+							buffer.WriteString("\n")
+							between = "</" + prevName + ">\n<" + prevName + ">\n"
+						}
+					}
+					continue
+				}
 				if inPattern && which == tag {
 					switch what {
 					case DORETAIN:
@@ -5735,7 +5800,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
 				val := strconv.Itoa(index)
 				acc(val)
 			case INC:
-				// -inc, or component of -0-based, -1-based, or -ucsc
+				// -inc, or component of -0-based, -1-based, or -ucsc-based
 				exploreElements(func(str string, lvl int) {
 					if str != "" {
 						num, err := strconv.Atoi(str)
@@ -5748,7 +5813,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
 					}
 				})
 			case DEC:
-				// -dec, or component of -0-based, -1-based, or -ucsc
+				// -dec, or component of -0-based, -1-based, or -ucsc-based
 				exploreElements(func(str string, lvl int) {
 					if str != "" {
 						num, err := strconv.Atoi(str)
@@ -5818,7 +5883,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st
 	between := ""
 
 	switch status {
-	case ELEMENT, ENCODE, UPPER, LOWER, TITLE, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSC:
+	case ELEMENT, ENCODE, UPPER, LOWER, TITLE, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSCBASED:
 		processElement(func(str string) {
 			if str != "" {
 				ok = true
@@ -6152,7 +6217,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 		str := op.Value
 
 		switch op.Type {
-		case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC:
+		case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSCBASED:
 			txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, sep, def, op.Type, index, level, variables)
 			if ok {
 				tab = col
@@ -6271,14 +6336,39 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in
 		case GT, GE, LT, LE, EQ, NE:
 			// second argument of numeric test can be element specifier
 			if constraint.Parent != "" || constraint.Match != "" || constraint.Attrib != "" {
-				ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(str string, lvl int) {
-					if str != "" {
-						_, errz := strconv.Atoi(str)
-						if errz == nil {
-							val = str
+				ch := val[0]
+				// pound, percent, and caret prefixes supported as potentially useful for data QA (undocumented)
+				switch ch {
+				case '#':
+					count := 0
+					ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+						count++
+					})
+					val = strconv.Itoa(count)
+				case '%':
+					length := 0
+					ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+						if stn != "" {
+							length += len(stn)
 						}
-					}
-				})
+					})
+					val = strconv.Itoa(length)
+				case '^':
+					depth := 0
+					ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+						depth = lvl
+					})
+					val = strconv.Itoa(depth)
+				default:
+					ExploreElements(curr, mask, constraint.Parent, constraint.Match, constraint.Attrib, constraint.Wild, level, func(stn string, lvl int) {
+						if stn != "" {
+							_, errz := strconv.Atoi(stn)
+							if errz == nil {
+								val = stn
+							}
+						}
+					})
+				}
 			}
 
 			// numeric tests on element values

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ncbi-entrez-direct.git



More information about the debian-med-commit mailing list