[med-svn] [Git][med-team/ncbi-entrez-direct][upstream] New upstream version 14.0.20201015+dfsg

Aaron M. Ucko gitlab at salsa.debian.org
Tue Oct 20 02:40:57 BST 2020



Aaron M. Ucko pushed to branch upstream at Debian Med / ncbi-entrez-direct


Commits:
ed0222f2 by Aaron M. Ucko at 2020-10-19T21:23:43-04:00
New upstream version 14.0.20201015+dfsg
- - - - -


3 changed files:

- ecommon.sh
- test-eutils
- xtract.go


Changes:

=====================================
ecommon.sh
=====================================
@@ -598,6 +598,7 @@ RequestWithRetry() {
         ErrorHead "$warn" "$when"
         PrintQuery "$@"
         ErrorTail "EMPTY RESULT" "$whch"
+        sleep 1
         when=$( date )
         # retry query
         res=$( "$@" )
@@ -614,6 +615,7 @@ RequestWithRetry() {
               ref=$( echo "$res" | xtract -pattern ERROR -element "*" )
             fi
             ErrorTail "$ref" "$whch"
+            sleep 1
             when=$( date )
             # retry query
             res=$( "$@" )
@@ -628,6 +630,7 @@ RequestWithRetry() {
               ref=$( echo "$res" | xtract -pattern error -element "*" )
             fi
             ErrorTail "$ref" "$whch"
+            sleep 1
             when=$( date )
             # retry query
             res=$( "$@" )
@@ -649,6 +652,7 @@ RequestWithRetry() {
                        -pattern "ErrorList/*" -pfx "  " -element "*" )
               fi
               ErrorTail "$ref" "$whch"
+              sleep 1
               when=$( date )
               # retry query
               res=$( "$@" )
@@ -659,6 +663,7 @@ RequestWithRetry() {
             ErrorHead "$warn" "$when"
             PrintQuery "$@"
             ErrorTail "$ref" "$whch"
+            sleep 1
             when=$( date )
             # retry query
             res=$( "$@" )


=====================================
test-eutils
=====================================
@@ -146,9 +146,14 @@ DoAlive() {
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils einfo.fcgi
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | xtract -pattern DbList -sep "\n" -element DbName |
@@ -165,14 +170,23 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 1341 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils elink.fcgi -dbfrom pubmed -db pubmed -id 2539356
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     num=$(
       echo "$res" | tr '\n' ' ' | xtract -pattern LinkSet -num "Link/Id"
@@ -186,14 +200,23 @@ DoAlive() {
       printf "."
     fi
     DoTime
+    if [ "$size" -ne 11750 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils efetch.fcgi -db pubmed -id 2539356 -rettype native -retmode xml
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | tr '\n' ' '
@@ -209,14 +232,23 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 21823 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils esummary.fcgi -db pubmed -id 2539356 -version 2.0
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | tr '\n' ' '
@@ -232,14 +264,23 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 3005 ]
+    then
+      echo "($size)"
+    fi
   done
 
   for i in $(seq 1 $repeats)
   do
     DoStart
+    size=0
     res=$(
       nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils esearch.fcgi -db pubmed -term "tn3 transposition immunity"
     )
+    if [ -n "$res" ]
+    then
+      size=${#res}
+    fi
     DoStop
     tst=$(
       echo "$res" | tr '\n' ' '
@@ -255,6 +296,10 @@ DoAlive() {
         ;;
     esac
     DoTime
+    if [ "$size" -ne 1589 ]
+    then
+      echo "($size)"
+    fi
   done
   printf "\n"
 }


=====================================
xtract.go
=====================================
@@ -297,7 +297,7 @@ Modification
 
 Efetch Normalization
 
-  -normalize      [database]
+  -normalize       [database]
 
 Validation
 
@@ -327,6 +327,8 @@ Data Conversion
                      [-nest flat|recurse|plural|depth]
 
   -a2x             Convert ASN.1 stream to XML suitable for -path navigation
+                     [-set setWrapper]
+                     [-rec recordWrapper]
 
   -t2x             Convert tab-delimited table to XML
                      [-set setWrapper]
@@ -3431,7 +3433,7 @@ func PrintSubtree(node *Node, style IndentType, printAttrs bool, proc func(strin
 }
 
 // ProcessClause handles comma-separated -element arguments
-func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, def string, status OpType, index, level int, variables map[string]string, transform map[string]string, histogram map[string]int) (string, bool) {
+func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, def string, wrp bool, status OpType, index, level int, variables map[string]string, transform map[string]string, histogram map[string]int) (string, bool) {
 
 	if curr == nil || stages == nil {
 		return "", false
@@ -3527,6 +3529,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 
 				// handle usual situation with no range first
 				if norm {
+					if wrp {
+						str = html.EscapeString(str)
+					}
 					acc(str)
 					return
 				}
@@ -3556,6 +3561,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 						str = str[:idx]
 					}
 					if str != "" {
+						if wrp {
+							str = html.EscapeString(str)
+						}
 						acc(str)
 					}
 					return
@@ -3629,6 +3637,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 					if doUpCase {
 						str = strings.ToUpper(str)
 					}
+					if wrp {
+						str = html.EscapeString(str)
+					}
 					acc(str)
 				} else if max == 0 {
 					if min > 0 && min < len(str) {
@@ -3640,6 +3651,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 							if doUpCase {
 								str = strings.ToUpper(str)
 							}
+							if wrp {
+								str = html.EscapeString(str)
+							}
 							acc(str)
 						}
 					}
@@ -3653,6 +3667,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 							if doUpCase {
 								str = strings.ToUpper(str)
 							}
+							if wrp {
+								str = html.EscapeString(str)
+							}
 							acc(str)
 						}
 					}
@@ -3666,6 +3683,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 							if doUpCase {
 								str = strings.ToUpper(str)
 							}
+							if wrp {
+								str = html.EscapeString(str)
+							}
 							acc(str)
 						}
 					}
@@ -3711,7 +3731,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
 			case ENCODE:
 				exploreElements(func(str string, lvl int) {
 					if str != "" {
-						str = html.EscapeString(str)
+						if !wrp {
+							str = html.EscapeString(str)
+						}
 						sendSlice(str)
 					}
 				})
@@ -4851,6 +4873,8 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 
 	varname := ""
 
+	wrp := false
+
 	plain := true
 	var currColor *color.Color
 
@@ -4873,7 +4897,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 		switch op.Type {
 		case ELEMENT, FIRST, LAST, ENCODE, DECODE, PLAIN, UPPER, LOWER, CHAIN, TITLE, ORDER, YEAR, TRANSLATE, TERMS, WORDS, PAIRS, REVERSE, LETTERS, CLAUSES, INDICES, MESHCODE, MATRIX, ACCENTED,
 			NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, MED, MUL, DIV, MOD, BIN, BIT, ZEROBASED, ONEBASED, UCSCBASED, REVCOMP, NUCLEIC, FASTA, NCBI2NA, NCBI4NA, MOLWT:
-			txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, plg, sep, def, op.Type, index, level, variables, transform, histogram)
+			txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, plg, sep, def, wrp, op.Type, index, level, variables, transform, histogram)
 			if ok {
 				plg = ""
 				lst = elg
@@ -4886,7 +4910,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				}
 			}
 		case HISTOGRAM:
-			txt, ok := ProcessClause(curr, op.Stages, mask, "", "", "", "", "", "", op.Type, index, level, variables, transform, histogram)
+			txt, ok := ProcessClause(curr, op.Stages, mask, "", "", "", "", "", "", wrp, op.Type, index, level, variables, transform, histogram)
 			if ok {
 				accum(txt)
 			}
@@ -4937,6 +4961,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				sfx = ""
 				plg = ""
 				elg = ""
+				wrp = false
 				break
 			}
 			lft, rgt := SplitInTwoAt(str, ",", RIGHT)
@@ -4949,6 +4974,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				sfx = "</" + rgt + ">"
 				sep = "</" + rgt + "><" + rgt + ">"
 			}
+			wrp = true
 		case RST:
 			pfx = ""
 			sfx = ""
@@ -4956,6 +4982,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 			elg = ""
 			sep = "\t"
 			def = ""
+			wrp = false
 		case DEF:
 			def = str
 		case COLOR:
@@ -4999,7 +5026,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
 				// -if "&VARIABLE" will fail if initialized with empty string ""
 				delete(variables, varname)
 			} else {
-				txt, ok := ProcessClause(curr, op.Stages, mask, "", pfx, sfx, plg, sep, def, op.Type, index, level, variables, transform, histogram)
+				txt, ok := ProcessClause(curr, op.Stages, mask, "", pfx, sfx, plg, sep, def, wrp, op.Type, index, level, variables, transform, histogram)
 				if ok {
 					plg = ""
 					lst = elg
@@ -8952,6 +8979,8 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
 			buffer.WriteString(indentSpaces[i])
 		}
 
+		count := 0
+
 		// recursive function definitions
 		var parseObject func(tag string)
 		var parseArray func(tag, pfx string, lvl int)
@@ -9011,6 +9040,17 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
 				buffer.WriteString(tag)
 				buffer.WriteString(">\n")
 			}
+
+			count++
+			if count > 1000 {
+				count = 0
+				txt := buffer.String()
+				if txt != "" {
+					// send current result through output channel
+					out <- txt
+				}
+				buffer.Reset()
+			}
 		}
 
 		parseObject = func(tag string) {
@@ -9084,9 +9124,10 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
 			}
 
 			txt := buffer.String()
-
-			// send result through output channel
-			out <- txt
+			if txt != "" {
+				// send remaining result through output channel
+				out <- txt
+			}
 
 			buffer.Reset()
 
@@ -9401,7 +9442,7 @@ func ASN1Tokenizer(inp io.Reader) <-chan string {
 }
 
 // ASN1Converter parses ASN1 token stream into XML object stream
-func ASN1Converter(inp <-chan string) <-chan string {
+func ASN1Converter(inp <-chan string, set, rec string) <-chan string {
 
 	if inp == nil {
 		return nil
@@ -9492,7 +9533,6 @@ func ASN1Converter(inp <-chan string) <-chan string {
 		count := 0
 
 		indent := 0
-		set := ""
 		if set != "" {
 			indent = 1
 		}
@@ -9564,9 +9604,9 @@ func ASN1Converter(inp <-chan string) <-chan string {
 		}
 
 		// recursive function definition
-		var parseAsnObject func(prnt string)
+		var parseAsnObject func(prnt string, lvl int)
 
-		parseAsnObject = func(prnt string) {
+		parseAsnObject = func(prnt string, lvl int) {
 
 			for {
 				tkn := nextToken()
@@ -9594,7 +9634,7 @@ func ASN1Converter(inp <-chan string) <-chan string {
 						printOpeningTag(trd)
 						tag = trd
 					}
-					parseAsnObject(tag)
+					parseAsnObject(tag, lvl+1)
 					if trd != "" {
 						printClosingTag(trd)
 					}
@@ -9602,6 +9642,9 @@ func ASN1Converter(inp <-chan string) <-chan string {
 						printClosingTag(sec)
 					}
 					printClosingTag(fst)
+					if lvl == 0 {
+						return
+					}
 				case ",":
 					fst, sec, trd := popFromArry()
 					if trd != "" {
@@ -9629,6 +9672,8 @@ func ASN1Converter(inp <-chan string) <-chan string {
 					}
 					return
 				case "::=":
+					fmt.Fprintf(os.Stderr, "\nERROR: Unexpected ::= token found\n")
+					os.Exit(1)
 				default:
 					arry = append(arry, tkn)
 				}
@@ -9646,6 +9691,10 @@ func ASN1Converter(inp <-chan string) <-chan string {
 			}
 		}
 
+		if set != "" {
+			out <- "<" + set + ">"
+		}
+
 		// process stream of catenated top-level ASN1 records
 		for {
 			arry = nil
@@ -9655,6 +9704,10 @@ func ASN1Converter(inp <-chan string) <-chan string {
 				break
 			}
 
+			if rec != "" {
+				top = rec
+			}
+
 			arry = append(arry, top)
 
 			tkn := nextToken()
@@ -9667,17 +9720,22 @@ func ASN1Converter(inp <-chan string) <-chan string {
 				os.Exit(1)
 			}
 
-			parseAsnObject(top)
+			parseAsnObject(top, 0)
 
 			txt := buffer.String()
-
-			// send remaining result through output channel
-			out <- txt
+			if txt != "" {
+				// send remaining result through output channel
+				out <- txt
+			}
 
 			buffer.Reset()
 
 			runtime.Gosched()
 		}
+
+		if set != "" {
+			out <- "</" + set + ">"
+		}
 	}
 
 	// launch single converter goroutine
@@ -10219,7 +10277,8 @@ func GenBankConverter(inp io.Reader) <-chan string {
 
 				rec.WriteString("      <INSDReference>\n")
 
-				str := strings.TrimPrefix(line, "REFERENCE")
+				txt := strings.TrimPrefix(line, "REFERENCE")
+				str := readContinuationLines(txt)
 				str = CompressRunsOfSpaces(str)
 				str = strings.TrimSpace(str)
 				idx := strings.Index(str, "(")
@@ -10236,13 +10295,22 @@ func GenBankConverter(inp io.Reader) <-chan string {
 						writeOneElement("        ", "INSDReference_position", posn)
 
 					} else {
-						cols := strings.Fields(posn)
-						if len(cols) == 4 && cols[2] == "to" {
-
-							writeOneElement("        ", "INSDReference_position", cols[1]+".."+cols[3])
-
+						var arry []string
+						cls := strings.Split(posn, ";")
+						for _, item := range cls {
+							item = strings.TrimPrefix(item, "bases ")
+							item = strings.TrimPrefix(item, "residues ")
+							item = strings.TrimSpace(item)
+							cols := strings.Fields(item)
+							if len(cols) == 3 && cols[1] == "to" {
+								arry = append(arry, cols[0]+".."+cols[2])
+							}
+						}
+						if len(arry) > 0 {
+							posit := strings.Join(arry, ",")
+							writeOneElement("        ", "INSDReference_position", posit)
 						} else {
-							fmt.Fprintf(os.Stderr, "ERROR: "+line+"\n")
+							fmt.Fprintf(os.Stderr, "ERROR: "+posn+"\n")
 						}
 					}
 				} else {
@@ -10250,7 +10318,6 @@ func GenBankConverter(inp io.Reader) <-chan string {
 
 					writeOneElement("        ", "INSDReference_reference", ref)
 				}
-				line = nextLine()
 				row++
 
 				if strings.HasPrefix(line, "  AUTHORS") {
@@ -11295,7 +11362,7 @@ func main() {
 		debug.FreeOSMemory()
 
 		if timr {
-			printDuration("records")
+			printDuration("blocks")
 		}
 
 		return
@@ -11303,13 +11370,51 @@ func main() {
 
 	// ASN.1 TO XML CONVERTER
 
-	if args[0] == "-a2x" || args[0] == "-asn2xml" || args[0] == "-asn2tokens" {
+	if args[0] == "-a2x" || args[0] == "-asn2xml" {
+
+		set := ""
+		rec := ""
+
+		nextArg := func() (string, bool) {
+
+			if len(args) < 1 {
+				return "", false
+			}
+
+			// remove next token from slice
+			nxt := args[0]
+			args = args[1:]
+
+			return nxt, true
+		}
+
+		// look for optional arguments
+		args = args[1:]
+		for {
+			arg, ok := nextArg()
+			if !ok {
+				break
+			}
+
+			switch arg {
+			case "-set":
+				// override set wrapper
+				set, ok = nextArg()
+				if ok && set == "-" {
+					set = ""
+				}
+			case "-rec":
+				// override record wrapper
+				rec, ok = nextArg()
+				if ok && rec == "-" {
+					rec = ""
+				}
+			}
+		}
 
 		atkn := ASN1Tokenizer(in)
 		acnv := atkn
-		if args[0] != "-asn2tokens" {
-			acnv = ASN1Converter(atkn)
-		}
+		acnv = ASN1Converter(atkn, set, rec)
 
 		if atkn == nil || acnv == nil {
 			fmt.Fprintf(os.Stderr, "\nERROR: Unable to create ASN.1 to XML converter\n")
@@ -11336,7 +11441,7 @@ func main() {
 		debug.FreeOSMemory()
 
 		if timr {
-			printDuration("lines")
+			printDuration("blocks")
 		}
 
 		return



View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/commit/ed0222f223cd8c5e8aed20c3ad8cb015a9d6f710

-- 
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/commit/ed0222f223cd8c5e8aed20c3ad8cb015a9d6f710
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201020/2cfc5b83/attachment-0001.html>


More information about the debian-med-commit mailing list