[med-svn] [Git][med-team/ncbi-entrez-direct][master] 4 commits: New upstream version 14.0.20201015+dfsg

Aaron M. Ucko gitlab at salsa.debian.org
Tue Oct 20 02:40:48 BST 2020



Aaron M. Ucko pushed to branch master at Debian Med / ncbi-entrez-direct


Commits:
ed0222f2 by Aaron M. Ucko at 2020-10-19T21:23:43-04:00
New upstream version 14.0.20201015+dfsg
- - - - -
bd1cc01d by Aaron M. Ucko at 2020-10-19T21:24:37-04:00
Merge tag 'upstream/14.0.20201015+dfsg' into master

Upstream version 14.0.20201015(+dfsg).

- - - - -
bb56ab24 by Aaron M. Ucko at 2020-10-19T21:29:12-04:00
debian/man/xtract.1: Update for new release (14.0.20201015[+dfsg]).

-a2x (Data Conversion) now optionally takes -set and/or -rec.

- - - - -
f2e30925 by Aaron M. Ucko at 2020-10-19T21:37:21-04:00
Finalize ncbi-entrez-direct 14.0.20201015+dfsg-1 for unstable.

- - - - -


5 changed files:

- debian/changelog
- debian/man/xtract.1
- ecommon.sh
- test-eutils
- xtract.go


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+ncbi-entrez-direct (14.0.20201015+dfsg-1) unstable; urgency=medium
+
+  * New upstream release.
+  * debian/man/xtract.1: Update for new release.
+
+ -- Aaron M. Ucko <ucko at debian.org>  Mon, 19 Oct 2020 21:37:21 -0400
+
 ncbi-entrez-direct (14.0.20201009+dfsg-1) unstable; urgency=medium
 
   * New upstream release.


=====================================
debian/man/xtract.1
=====================================
@@ -1,4 +1,4 @@
-.TH XTRACT 1 2020-10-12 NCBI "NCBI Entrez Direct User's Manual"
+.TH XTRACT 1 2020-10-19 NCBI "NCBI Entrez Direct User's Manual"
 .SH NAME
 gbf2xml, xtract \- NCBI Entrez Direct XML conversion and transformation tool
 .SH SYNOPSIS
@@ -123,7 +123,7 @@ gbf2xml, xtract \- NCBI Entrez Direct XML conversion and transformation tool
 [\|\fB\-sort\fP\ \fIelement\fP\|]
 [\|\fB\-j2x\fP [\|\fB\-set\fP\ \fItag\fP\|] [\|\fB\-rec\fP\ \fItag\fP\|] \
 [\|\fB\-nest\fP\ \fBflat\fP|\fBrecurse\fP|\fBplural\fP|\fBdepth\fP\|]\|]
-[\|\fB\-a2x\fP\|]
+[\|\fB\-a2x\fP [\|\fB\-set\fP\ \fItag\fP\|] [\|\fB\-rec\fP\ \fItag\fP\|]\|]
 [\|\fB\-t2x\fP [\|\fB\-set\fP\ \fItag\fP\|] [\|\fB\-rec\fP\ \fItag\fP\|] \
 [\|\fB\-skip\fP\ \fIN\fP\|] [\|\fB\-lower\fP|\fB\-upper\fP\|] \
 [\|\fB\-indent\fP|\fB\-flush\fP\|] \fIcolumnName1\fP\ ...\|]
@@ -712,6 +712,14 @@ Nested array naming policy.
 .TP
 \fB\-a2x\fP
 Convert text ASN.1 stream to XML suitable for \fB\-path\fP navigation.
+.PD 0
+.RS
+.IP \fB\-set\fP\ \fItag\fP 10
+Replace set wrapper tag.
+.IP \fB\-rec\fP\ \fItag\fP 10
+Replace record wrapper tag.
+.RE
+.PD
 .TP
 \fB\-t2x\fP, \fB\-c2x\fP
 Convert tab\-delimited table or comma\-separated values file,


=====================================
ecommon.sh
=====================================
@@ -598,6 +598,7 @@ RequestWithRetry() {
         ErrorHead "$warn" "$when"
         PrintQuery "$@"
         ErrorTail "EMPTY RESULT" "$whch"
+        sleep 1
         when=$( date )
         # retry query
         res=$( "$@" )
@@ -614,6 +615,7 @@ RequestWithRetry() {
               ref=$( echo "$res" | xtract -pattern ERROR -element "*" )
             fi
             ErrorTail "$ref" "$whch"
+            sleep 1
             when=$( date )
             # retry query
             res=$( "$@" )
@@ -628,6 +630,7 @@ RequestWithRetry() {
               ref=$( echo "$res" | xtract -pattern error -element "*" )
             fi
             ErrorTail "$ref" "$whch"
+            sleep 1
             when=$( date )
             # retry query
             res=$( "$@" )
@@ -649,6 +652,7 @@ RequestWithRetry() {
                        -pattern "ErrorList/*" -pfx "  " -element "*" )
               fi
               ErrorTail "$ref" "$whch"
+              sleep 1
               when=$( date )
               # retry query
               res=$( "$@" )
@@ -659,6 +663,7 @@ RequestWithRetry() {
             ErrorHead "$warn" "$when"
             PrintQuery "$@"
             ErrorTail "$ref" "$whch"
+            sleep 1
             when=$( date )
             # retry query
             res=$( "$@" )


=====================================
test-eutils
=====================================
@@ -146,9 +146,14 @@ DoAlive() {
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils einfo.fcgi
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | xtract -pattern DbList -sep "\n" -element DbName |
@@ -165,14 +170,23 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 1341 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils elink.fcgi -dbfrom pubmed -db pubmed -id 2539356
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     num=$(
       echo "$res" | tr '\n' ' ' | xtract -pattern LinkSet -num "Link/Id"
@@ -186,14 +200,23 @@ DoAlive() {
       printf "."
     fi
     DoTime
+    if [ "$size" -ne 11750 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils efetch.fcgi -db pubmed -id 2539356 -rettype native -retmode xml
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | tr '\n' ' '
@@ -209,14 +232,23 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 21823 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils esummary.fcgi -db pubmed -id 2539356 -version 2.0
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | tr '\n' ' '
@@ -232,14 +264,23 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 3005 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils esearch.fcgi -db pubmed -term "tn3 transposition immunity"
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | tr '\n' ' '
@@ -255,6 +296,10 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 1589 ]
+    then
+      echo "($size)"
+    fi
   done
   printf "\n"
 }


=====================================
xtract.go
=====================================
@@ -297,7 +297,7 @@ Modification
 
 Efetch Normalization
 
-  -normalize      [database]
+  -normalize       [database]
 
 Validation
 
@@ -327,6 +327,8 @@ Data Conversion
                      [-nest flat|recurse|plural|depth]
 
   -a2x             Convert ASN.1 stream to XML suitable for -path navigation
+                     [-set setWrapper]
+                     [-rec recordWrapper]
 
   -t2x             Convert tab-delimited table to XML
                      [-set setWrapper]
@@ -3431,7 +3433,7 @@ func PrintSubtree(node *Node, style IndentType, printAttrs bool, proc func(strin
 }
 
 // ProcessClause handles comma-separated -element arguments
-func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, def string, status OpType, index, level int, variables map[string]string, transform map[string]string, histogram map[string]int) (string, bool) {
+func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, def string, wrp bool, status OpType, index, level int, variables map[string]string, transform map[string]string, histogram map[string]int) (string, bool) {
 
 	if curr == nil || stages == nil {
 		return "", false
@@ -3527,6 +3529,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 
 				// handle usual situation with no range first
 				if norm {
+					if wrp {
+						str = html.EscapeString(str)
+					}
 					acc(str)
 					return
 				}
@@ -3556,6 +3561,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 						str = str[:idx]
 					}
 					if str != "" {
+						if wrp {
+							str = html.EscapeString(str)
+						}
 						acc(str)
 					}
 					return
@@ -3629,6 +3637,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 					if doUpCase {
 						str = strings.ToUpper(str)
 					}
+					if wrp {
+						str = html.EscapeString(str)
+					}
 					acc(str)
 				} else if max == 0 {
 					if min > 0 && min < len(str) {
@@ -3640,6 +3651,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 							if doUpCase {
 								str = strings.ToUpper(str)
 							}
+							if wrp {
+								str = html.EscapeString(str)
+							}
 							acc(str)
 						}
 					}
@@ -3653,6 +3667,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 							if doUpCase {
 								str = strings.ToUpper(str)
 							}
+							if wrp {
+								str = html.EscapeString(str)
+							}
 							acc(str)
 						}
 					}
@@ -3666,6 +3683,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 							if doUpCase {
 								str = strings.ToUpper(str)
 							}
+							if wrp {
+								str = html.EscapeString(str)
+							}
 							acc(str)
 						}
 					}
@@ -3711,7 +3731,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 			case ENCODE:
 				exploreElements(func(str string, lvl int) {
 					if str != "" {
-						str = html.EscapeString(str)
+						if !wrp {
+							str = html.EscapeString(str)
+						}
 						sendSlice(str)
 					}
 				})
@@ -4851,6 +4873,8 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 
 	varname := ""
 
+	wrp := false
+
 	plain := true
 	var currColor *color.Color
 
@@ -4873,7 +4897,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 		switch op.Type {
 		case ELEMENT, FIRST, LAST, ENCODE, DECODE, PLAIN, UPPER, LOWER, CHAIN, TITLE, ORDER, YEAR, TRANSLATE, TERMS, WORDS, PAIRS, REVERSE, LETTERS, CLAUSES, INDICES, MESHCODE, MATRIX, ACCENTED,
 			NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, MED, MUL, DIV, MOD, BIN, BIT, ZEROBASED, ONEBASED, UCSCBASED, REVCOMP, NUCLEIC, FASTA, NCBI2NA, NCBI4NA, MOLWT:
-			txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, plg, sep, def, op.Type, index, level, variables, transform, histogram)
+			txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, plg, sep, def, wrp, op.Type, index, level, variables, transform, histogram)
 			if ok {
 				plg = ""
 				lst = elg
@@ -4886,7 +4910,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				}
 			}
 		case HISTOGRAM:
-			txt, ok := ProcessClause(curr, op.Stages, mask, "", "", "", "", "", "", op.Type, index, level, variables, transform, histogram)
+			txt, ok := ProcessClause(curr, op.Stages, mask, "", "", "", "", "", "", wrp, op.Type, index, level, variables, transform, histogram)
 			if ok {
 				accum(txt)
 			}
@@ -4937,6 +4961,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				sfx = ""
 				plg = ""
 				elg = ""
+				wrp = false
 				break
 			}
 			lft, rgt := SplitInTwoAt(str, ",", RIGHT)
@@ -4949,6 +4974,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				sfx = "</" + rgt + ">"
 				sep = "</" + rgt + "><" + rgt + ">"
 			}
+			wrp = true
 		case RST:
 			pfx = ""
 			sfx = ""
@@ -4956,6 +4982,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 			elg = ""
 			sep = "\t"
 			def = ""
+			wrp = false
 		case DEF:
 			def = str
 		case COLOR:
@@ -4999,7 +5026,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				// -if "&VARIABLE" will fail if initialized with empty string ""
 				delete(variables, varname)
 			} else {
-				txt, ok := ProcessClause(curr, op.Stages, mask, "", pfx, sfx, plg, sep, def, op.Type, index, level, variables, transform, histogram)
+				txt, ok := ProcessClause(curr, op.Stages, mask, "", pfx, sfx, plg, sep, def, wrp, op.Type, index, level, variables, transform, histogram)
 				if ok {
 					plg = ""
 					lst = elg
@@ -8952,6 +8979,8 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
 			buffer.WriteString(indentSpaces[i])
 		}
 
+		count := 0
+
 		// recursive function definitions
 		var parseObject func(tag string)
 		var parseArray func(tag, pfx string, lvl int)
@@ -9011,6 +9040,17 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
 				buffer.WriteString(tag)
 				buffer.WriteString(">\n")
 			}
+
+			count++
+			if count > 1000 {
+				count = 0
+				txt := buffer.String()
+				if txt != "" {
+					// send current result through output channel
+					out <- txt
+				}
+				buffer.Reset()
+			}
 		}
 
 		parseObject = func(tag string) {
@@ -9084,9 +9124,10 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
 			}
 
 			txt := buffer.String()
-
-			// send result through output channel
-			out <- txt
+			if txt != "" {
+				// send remaining result through output channel
+				out <- txt
+			}
 
 			buffer.Reset()
 
@@ -9401,7 +9442,7 @@ func ASN1Tokenizer(inp io.Reader) <-chan string {
 }
 
 // ASN1Converter parses ASN1 token stream into XML object stream
-func ASN1Converter(inp <-chan string) <-chan string {
+func ASN1Converter(inp <-chan string, set, rec string) <-chan string {
 
 	if inp == nil {
 		return nil
@@ -9492,7 +9533,6 @@ func ASN1Converter(inp <-chan string) <-chan string {
 		count := 0
 
 		indent := 0
-		set := ""
 		if set != "" {
 			indent = 1
 		}
@@ -9564,9 +9604,9 @@ func ASN1Converter(inp <-chan string) <-chan string {
 		}
 
 		// recursive function definition
-		var parseAsnObject func(prnt string)
+		var parseAsnObject func(prnt string, lvl int)
 
-		parseAsnObject = func(prnt string) {
+		parseAsnObject = func(prnt string, lvl int) {
 
 			for {
 				tkn := nextToken()
@@ -9594,7 +9634,7 @@ func ASN1Converter(inp <-chan string) <-chan string {
 						printOpeningTag(trd)
 						tag = trd
 					}
-					parseAsnObject(tag)
+					parseAsnObject(tag, lvl+1)
 					if trd != "" {
 						printClosingTag(trd)
 					}
@@ -9602,6 +9642,9 @@ func ASN1Converter(inp <-chan string) <-chan string {
 						printClosingTag(sec)
 					}
 					printClosingTag(fst)
+					if lvl == 0 {
+						return
+					}
 				case ",":
 					fst, sec, trd := popFromArry()
 					if trd != "" {
@@ -9629,6 +9672,8 @@ func ASN1Converter(inp <-chan string) <-chan string {
 					}
 					return
 				case "::=":
+					fmt.Fprintf(os.Stderr, "\nERROR: Unexpected ::= token found\n")
+					os.Exit(1)
 				default:
 					arry = append(arry, tkn)
 				}
@@ -9646,6 +9691,10 @@ func ASN1Converter(inp <-chan string) <-chan string {
 			}
 		}
 
+		if set != "" {
+			out <- "<" + set + ">"
+		}
+
 		// process stream of catenated top-level ASN1 records
 		for {
 			arry = nil
@@ -9655,6 +9704,10 @@ func ASN1Converter(inp <-chan string) <-chan string {
 				break
 			}
 
+			if rec != "" {
+				top = rec
+			}
+
 			arry = append(arry, top)
 
 			tkn := nextToken()
@@ -9667,17 +9720,22 @@ func ASN1Converter(inp <-chan string) <-chan string {
 				os.Exit(1)
 			}
 
-			parseAsnObject(top)
+			parseAsnObject(top, 0)
 
 			txt := buffer.String()
-
-			// send remaining result through output channel
-			out <- txt
+			if txt != "" {
+				// send remaining result through output channel
+				out <- txt
+			}
 
 			buffer.Reset()
 
 			runtime.Gosched()
 		}
+
+		if set != "" {
+			out <- "</" + set + ">"
+		}
 	}
 
 	// launch single converter goroutine
@@ -10219,7 +10277,8 @@ func GenBankConverter(inp io.Reader) <-chan string {
 
 				rec.WriteString("      <INSDReference>\n")
 
-				str := strings.TrimPrefix(line, "REFERENCE")
+				txt := strings.TrimPrefix(line, "REFERENCE")
+				str := readContinuationLines(txt)
 				str = CompressRunsOfSpaces(str)
 				str = strings.TrimSpace(str)
 				idx := strings.Index(str, "(")
@@ -10236,13 +10295,22 @@ func GenBankConverter(inp io.Reader) <-chan string {
 						writeOneElement("        ", "INSDReference_position", posn)
 
 					} else {
-						cols := strings.Fields(posn)
-						if len(cols) == 4 && cols[2] == "to" {
-
-							writeOneElement("        ", "INSDReference_position", cols[1]+".."+cols[3])
-
+						var arry []string
+						cls := strings.Split(posn, ";")
+						for _, item := range cls {
+							item = strings.TrimPrefix(item, "bases ")
+							item = strings.TrimPrefix(item, "residues ")
+							item = strings.TrimSpace(item)
+							cols := strings.Fields(item)
+							if len(cols) == 3 && cols[1] == "to" {
+								arry = append(arry, cols[0]+".."+cols[2])
+							}
+						}
+						if len(arry) > 0 {
+							posit := strings.Join(arry, ",")
+							writeOneElement("        ", "INSDReference_position", posit)
 						} else {
-							fmt.Fprintf(os.Stderr, "ERROR: "+line+"\n")
+							fmt.Fprintf(os.Stderr, "ERROR: "+posn+"\n")
 						}
 					}
 				} else {
@@ -10250,7 +10318,6 @@ func GenBankConverter(inp io.Reader) <-chan string {
 
 					writeOneElement("        ", "INSDReference_reference", ref)
 				}
-				line = nextLine()
 				row++
 
 				if strings.HasPrefix(line, "  AUTHORS") {
@@ -11295,7 +11362,7 @@ func main() {
 		debug.FreeOSMemory()
 
 		if timr {
-			printDuration("records")
+			printDuration("blocks")
 		}
 
 		return
@@ -11303,13 +11370,51 @@ func main() {
 
 	// ASN.1 TO XML CONVERTER
 
-	if args[0] == "-a2x" || args[0] == "-asn2xml" || args[0] == "-asn2tokens" {
+	if args[0] == "-a2x" || args[0] == "-asn2xml" {
+
+		set := ""
+		rec := ""
+
+		nextArg := func() (string, bool) {
+
+			if len(args) < 1 {
+				return "", false
+			}
+
+			// remove next token from slice
+			nxt := args[0]
+			args = args[1:]
+
+			return nxt, true
+		}
+
+		// look for optional arguments
+		args = args[1:]
+		for {
+			arg, ok := nextArg()
+			if !ok {
+				break
+			}
+
+			switch arg {
+			case "-set":
+				// override set wrapper
+				set, ok = nextArg()
+				if ok && set == "-" {
+					set = ""
+				}
+			case "-rec":
+				// override record wrapper
+				rec, ok = nextArg()
+				if ok && rec == "-" {
+					rec = ""
+				}
+			}
+		}
 
 		atkn := ASN1Tokenizer(in)
 		acnv := atkn
-		if args[0] != "-asn2tokens" {
-			acnv = ASN1Converter(atkn)
-		}
+		acnv = ASN1Converter(atkn, set, rec)
 
 		if atkn == nil || acnv == nil {
 			fmt.Fprintf(os.Stderr, "\nERROR: Unable to create ASN.1 to XML converter\n")
@@ -11336,7 +11441,7 @@ func main() {
 		debug.FreeOSMemory()
 
 		if timr {
-			printDuration("lines")
+			printDuration("blocks")
 		}
 
 		return



View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/1a0a45b33d972a7e14864aeabc2f6ad7ec43122c...f2e309255fa5041d1709a57aa0230c54a066fe51

-- 
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/1a0a45b33d972a7e14864aeabc2f6ad7ec43122c...f2e309255fa5041d1709a57aa0230c54a066fe51
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201020/da0b232d/attachment-0001.html>


More information about the debian-med-commit mailing list