[med-svn] [Git][med-team/ncbi-entrez-direct][master] 4 commits: New upstream version 14.0.20201015+dfsg
Aaron M. Ucko
gitlab at salsa.debian.org
Tue Oct 20 02:40:48 BST 2020
Aaron M. Ucko pushed to branch master at Debian Med / ncbi-entrez-direct
Commits:
ed0222f2 by Aaron M. Ucko at 2020-10-19T21:23:43-04:00
New upstream version 14.0.20201015+dfsg
- - - - -
bd1cc01d by Aaron M. Ucko at 2020-10-19T21:24:37-04:00
Merge tag 'upstream/14.0.20201015+dfsg' into master
Upstream version 14.0.20201015(+dfsg).
- - - - -
bb56ab24 by Aaron M. Ucko at 2020-10-19T21:29:12-04:00
debian/man/xtract.1: Update for new release (14.0.20201015[+dfsg]).
-a2x (Data Conversion) now optionally takes -set and/or -rec.
- - - - -
f2e30925 by Aaron M. Ucko at 2020-10-19T21:37:21-04:00
Finalize ncbi-entrez-direct 14.0.20201015+dfsg-1 for unstable.
- - - - -
5 changed files:
- debian/changelog
- debian/man/xtract.1
- ecommon.sh
- test-eutils
- xtract.go
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+ncbi-entrez-direct (14.0.20201015+dfsg-1) unstable; urgency=medium
+
+ * New upstream release.
+ * debian/man/xtract.1: Update for new release.
+
+ -- Aaron M. Ucko <ucko at debian.org> Mon, 19 Oct 2020 21:37:21 -0400
+
ncbi-entrez-direct (14.0.20201009+dfsg-1) unstable; urgency=medium
* New upstream release.
=====================================
debian/man/xtract.1
=====================================
@@ -1,4 +1,4 @@
-.TH XTRACT 1 2020-10-12 NCBI "NCBI Entrez Direct User's Manual"
+.TH XTRACT 1 2020-10-19 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
gbf2xml, xtract \- NCBI Entrez Direct XML conversion and transformation tool
.SH SYNOPSIS
@@ -123,7 +123,7 @@ gbf2xml, xtract \- NCBI Entrez Direct XML conversion and transformation tool
[\|\fB\-sort\fP\ \fIelement\fP\|]
[\|\fB\-j2x\fP [\|\fB\-set\fP\ \fItag\fP\|] [\|\fB\-rec\fP\ \fItag\fP\|] \
[\|\fB\-nest\fP\ \fBflat\fP|\fBrecurse\fP|\fBplural\fP|\fBdepth\fP\|]\|]
-[\|\fB\-a2x\fP\|]
+[\|\fB\-a2x\fP [\|\fB\-set\fP\ \fItag\fP\|] [\|\fB\-rec\fP\ \fItag\fP\|]\|]
[\|\fB\-t2x\fP [\|\fB\-set\fP\ \fItag\fP\|] [\|\fB\-rec\fP\ \fItag\fP\|] \
[\|\fB\-skip\fP\ \fIN\fP\|] [\|\fB\-lower\fP|\fB\-upper\fP\|] \
[\|\fB\-indent\fP|\fB\-flush\fP\|] \fIcolumnName1\fP\ ...\|]
@@ -712,6 +712,14 @@ Nested array naming policy.
.TP
\fB\-a2x\fP
Convert text ASN.1 stream to XML suitable for \fB\-path\fP navigation.
+.PD 0
+.RS
+.IP \fB\-set\fP\ \fItag\fP 10
+Replace set wrapper tag.
+.IP \fB\-rec\fP\ \fItag\fP 10
+Replace record wrapper tag.
+.RE
+.PD
.TP
\fB\-t2x\fP, \fB\-c2x\fP
Convert tab\-delimited table or comma\-separated values file,
=====================================
ecommon.sh
=====================================
@@ -598,6 +598,7 @@ RequestWithRetry() {
ErrorHead "$warn" "$when"
PrintQuery "$@"
ErrorTail "EMPTY RESULT" "$whch"
+ sleep 1
when=$( date )
# retry query
res=$( "$@" )
@@ -614,6 +615,7 @@ RequestWithRetry() {
ref=$( echo "$res" | xtract -pattern ERROR -element "*" )
fi
ErrorTail "$ref" "$whch"
+ sleep 1
when=$( date )
# retry query
res=$( "$@" )
@@ -628,6 +630,7 @@ RequestWithRetry() {
ref=$( echo "$res" | xtract -pattern error -element "*" )
fi
ErrorTail "$ref" "$whch"
+ sleep 1
when=$( date )
# retry query
res=$( "$@" )
@@ -649,6 +652,7 @@ RequestWithRetry() {
-pattern "ErrorList/*" -pfx " " -element "*" )
fi
ErrorTail "$ref" "$whch"
+ sleep 1
when=$( date )
# retry query
res=$( "$@" )
@@ -659,6 +663,7 @@ RequestWithRetry() {
ErrorHead "$warn" "$when"
PrintQuery "$@"
ErrorTail "$ref" "$whch"
+ sleep 1
when=$( date )
# retry query
res=$( "$@" )
=====================================
test-eutils
=====================================
@@ -146,9 +146,14 @@ DoAlive() {
for i in $(seq 1 $repeats)
do
DoStart
+ size=0
res=$(
nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils einfo.fcgi
)
+ if [ -n "$res" ]
+ then
+ size=${#res}
+ fi
DoStop
tst=$(
echo "$res" | xtract -pattern DbList -sep "\n" -element DbName |
@@ -165,14 +170,23 @@ DoAlive() {
;;
esac
DoTime
+ if [ "$size" -ne 1341 ]
+ then
+ echo "($size)"
+ fi
done
for i in $(seq 1 $repeats)
do
DoStart
+ size=0
res=$(
nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils elink.fcgi -dbfrom pubmed -db pubmed -id 2539356
)
+ if [ -n "$res" ]
+ then
+ size=${#res}
+ fi
DoStop
num=$(
echo "$res" | tr '\n' ' ' | xtract -pattern LinkSet -num "Link/Id"
@@ -186,14 +200,23 @@ DoAlive() {
printf "."
fi
DoTime
+ if [ "$size" -ne 11750 ]
+ then
+ echo "($size)"
+ fi
done
for i in $(seq 1 $repeats)
do
DoStart
+ size=0
res=$(
nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils efetch.fcgi -db pubmed -id 2539356 -rettype native -retmode xml
)
+ if [ -n "$res" ]
+ then
+ size=${#res}
+ fi
DoStop
tst=$(
echo "$res" | tr '\n' ' '
@@ -209,14 +232,23 @@ DoAlive() {
;;
esac
DoTime
+ if [ "$size" -ne 21823 ]
+ then
+ echo "($size)"
+ fi
done
for i in $(seq 1 $repeats)
do
DoStart
+ size=0
res=$(
nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils esummary.fcgi -db pubmed -id 2539356 -version 2.0
)
+ if [ -n "$res" ]
+ then
+ size=${#res}
+ fi
DoStop
tst=$(
echo "$res" | tr '\n' ' '
@@ -232,14 +264,23 @@ DoAlive() {
;;
esac
DoTime
+ if [ "$size" -ne 3005 ]
+ then
+ echo "($size)"
+ fi
done
for i in $(seq 1 $repeats)
do
DoStart
+ size=0
res=$(
nquire -get https://eutils.ncbi.nlm.nih.gov/entrez/eutils esearch.fcgi -db pubmed -term "tn3 transposition immunity"
)
+ if [ -n "$res" ]
+ then
+ size=${#res}
+ fi
DoStop
tst=$(
echo "$res" | tr '\n' ' '
@@ -255,6 +296,10 @@ DoAlive() {
;;
esac
DoTime
+ if [ "$size" -ne 1589 ]
+ then
+ echo "($size)"
+ fi
done
printf "\n"
}
=====================================
xtract.go
=====================================
@@ -297,7 +297,7 @@ Modification
Efetch Normalization
- -normalize [database]
+ -normalize [database]
Validation
@@ -327,6 +327,8 @@ Data Conversion
[-nest flat|recurse|plural|depth]
-a2x Convert ASN.1 stream to XML suitable for -path navigation
+ [-set setWrapper]
+ [-rec recordWrapper]
-t2x Convert tab-delimited table to XML
[-set setWrapper]
@@ -3431,7 +3433,7 @@ func PrintSubtree(node *Node, style IndentType, printAttrs bool, proc func(strin
}
// ProcessClause handles comma-separated -element arguments
-func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, def string, status OpType, index, level int, variables map[string]string, transform map[string]string, histogram map[string]int) (string, bool) {
+func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, def string, wrp bool, status OpType, index, level int, variables map[string]string, transform map[string]string, histogram map[string]int) (string, bool) {
if curr == nil || stages == nil {
return "", false
@@ -3527,6 +3529,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
// handle usual situation with no range first
if norm {
+ if wrp {
+ str = html.EscapeString(str)
+ }
acc(str)
return
}
@@ -3556,6 +3561,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
str = str[:idx]
}
if str != "" {
+ if wrp {
+ str = html.EscapeString(str)
+ }
acc(str)
}
return
@@ -3629,6 +3637,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doUpCase {
str = strings.ToUpper(str)
}
+ if wrp {
+ str = html.EscapeString(str)
+ }
acc(str)
} else if max == 0 {
if min > 0 && min < len(str) {
@@ -3640,6 +3651,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doUpCase {
str = strings.ToUpper(str)
}
+ if wrp {
+ str = html.EscapeString(str)
+ }
acc(str)
}
}
@@ -3653,6 +3667,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doUpCase {
str = strings.ToUpper(str)
}
+ if wrp {
+ str = html.EscapeString(str)
+ }
acc(str)
}
}
@@ -3666,6 +3683,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
if doUpCase {
str = strings.ToUpper(str)
}
+ if wrp {
+ str = html.EscapeString(str)
+ }
acc(str)
}
}
@@ -3711,7 +3731,9 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, plg, sep, d
case ENCODE:
exploreElements(func(str string, lvl int) {
if str != "" {
- str = html.EscapeString(str)
+ if !wrp {
+ str = html.EscapeString(str)
+ }
sendSlice(str)
}
})
@@ -4851,6 +4873,8 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
varname := ""
+ wrp := false
+
plain := true
var currColor *color.Color
@@ -4873,7 +4897,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
switch op.Type {
case ELEMENT, FIRST, LAST, ENCODE, DECODE, PLAIN, UPPER, LOWER, CHAIN, TITLE, ORDER, YEAR, TRANSLATE, TERMS, WORDS, PAIRS, REVERSE, LETTERS, CLAUSES, INDICES, MESHCODE, MATRIX, ACCENTED,
NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, MED, MUL, DIV, MOD, BIN, BIT, ZEROBASED, ONEBASED, UCSCBASED, REVCOMP, NUCLEIC, FASTA, NCBI2NA, NCBI4NA, MOLWT:
- txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, plg, sep, def, op.Type, index, level, variables, transform, histogram)
+ txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, plg, sep, def, wrp, op.Type, index, level, variables, transform, histogram)
if ok {
plg = ""
lst = elg
@@ -4886,7 +4910,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
}
}
case HISTOGRAM:
- txt, ok := ProcessClause(curr, op.Stages, mask, "", "", "", "", "", "", op.Type, index, level, variables, transform, histogram)
+ txt, ok := ProcessClause(curr, op.Stages, mask, "", "", "", "", "", "", wrp, op.Type, index, level, variables, transform, histogram)
if ok {
accum(txt)
}
@@ -4937,6 +4961,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
sfx = ""
plg = ""
elg = ""
+ wrp = false
break
}
lft, rgt := SplitInTwoAt(str, ",", RIGHT)
@@ -4949,6 +4974,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
sfx = "</" + rgt + ">"
sep = "</" + rgt + "><" + rgt + ">"
}
+ wrp = true
case RST:
pfx = ""
sfx = ""
@@ -4956,6 +4982,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
elg = ""
sep = "\t"
def = ""
+ wrp = false
case DEF:
def = str
case COLOR:
@@ -4999,7 +5026,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin
// -if "&VARIABLE" will fail if initialized with empty string ""
delete(variables, varname)
} else {
- txt, ok := ProcessClause(curr, op.Stages, mask, "", pfx, sfx, plg, sep, def, op.Type, index, level, variables, transform, histogram)
+ txt, ok := ProcessClause(curr, op.Stages, mask, "", pfx, sfx, plg, sep, def, wrp, op.Type, index, level, variables, transform, histogram)
if ok {
plg = ""
lst = elg
@@ -8952,6 +8979,8 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
buffer.WriteString(indentSpaces[i])
}
+ count := 0
+
// recursive function definitions
var parseObject func(tag string)
var parseArray func(tag, pfx string, lvl int)
@@ -9011,6 +9040,17 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
buffer.WriteString(tag)
buffer.WriteString(">\n")
}
+
+ count++
+ if count > 1000 {
+ count = 0
+ txt := buffer.String()
+ if txt != "" {
+ // send current result through output channel
+ out <- txt
+ }
+ buffer.Reset()
+ }
}
parseObject = func(tag string) {
@@ -9084,9 +9124,10 @@ func JSONConverter(inp <-chan string, set, rec, nest string) <-chan string {
}
txt := buffer.String()
-
- // send result through output channel
- out <- txt
+ if txt != "" {
+ // send remaining result through output channel
+ out <- txt
+ }
buffer.Reset()
@@ -9401,7 +9442,7 @@ func ASN1Tokenizer(inp io.Reader) <-chan string {
}
// ASN1Converter parses ASN1 token stream into XML object stream
-func ASN1Converter(inp <-chan string) <-chan string {
+func ASN1Converter(inp <-chan string, set, rec string) <-chan string {
if inp == nil {
return nil
@@ -9492,7 +9533,6 @@ func ASN1Converter(inp <-chan string) <-chan string {
count := 0
indent := 0
- set := ""
if set != "" {
indent = 1
}
@@ -9564,9 +9604,9 @@ func ASN1Converter(inp <-chan string) <-chan string {
}
// recursive function definition
- var parseAsnObject func(prnt string)
+ var parseAsnObject func(prnt string, lvl int)
- parseAsnObject = func(prnt string) {
+ parseAsnObject = func(prnt string, lvl int) {
for {
tkn := nextToken()
@@ -9594,7 +9634,7 @@ func ASN1Converter(inp <-chan string) <-chan string {
printOpeningTag(trd)
tag = trd
}
- parseAsnObject(tag)
+ parseAsnObject(tag, lvl+1)
if trd != "" {
printClosingTag(trd)
}
@@ -9602,6 +9642,9 @@ func ASN1Converter(inp <-chan string) <-chan string {
printClosingTag(sec)
}
printClosingTag(fst)
+ if lvl == 0 {
+ return
+ }
case ",":
fst, sec, trd := popFromArry()
if trd != "" {
@@ -9629,6 +9672,8 @@ func ASN1Converter(inp <-chan string) <-chan string {
}
return
case "::=":
+ fmt.Fprintf(os.Stderr, "\nERROR: Unexpected ::= token found\n")
+ os.Exit(1)
default:
arry = append(arry, tkn)
}
@@ -9646,6 +9691,10 @@ func ASN1Converter(inp <-chan string) <-chan string {
}
}
+ if set != "" {
+ out <- "<" + set + ">"
+ }
+
// process stream of catenated top-level ASN1 records
for {
arry = nil
@@ -9655,6 +9704,10 @@ func ASN1Converter(inp <-chan string) <-chan string {
break
}
+ if rec != "" {
+ top = rec
+ }
+
arry = append(arry, top)
tkn := nextToken()
@@ -9667,17 +9720,22 @@ func ASN1Converter(inp <-chan string) <-chan string {
os.Exit(1)
}
- parseAsnObject(top)
+ parseAsnObject(top, 0)
txt := buffer.String()
-
- // send remaining result through output channel
- out <- txt
+ if txt != "" {
+ // send remaining result through output channel
+ out <- txt
+ }
buffer.Reset()
runtime.Gosched()
}
+
+ if set != "" {
+ out <- "</" + set + ">"
+ }
}
// launch single converter goroutine
@@ -10219,7 +10277,8 @@ func GenBankConverter(inp io.Reader) <-chan string {
rec.WriteString(" <INSDReference>\n")
- str := strings.TrimPrefix(line, "REFERENCE")
+ txt := strings.TrimPrefix(line, "REFERENCE")
+ str := readContinuationLines(txt)
str = CompressRunsOfSpaces(str)
str = strings.TrimSpace(str)
idx := strings.Index(str, "(")
@@ -10236,13 +10295,22 @@ func GenBankConverter(inp io.Reader) <-chan string {
writeOneElement(" ", "INSDReference_position", posn)
} else {
- cols := strings.Fields(posn)
- if len(cols) == 4 && cols[2] == "to" {
-
- writeOneElement(" ", "INSDReference_position", cols[1]+".."+cols[3])
-
+ var arry []string
+ cls := strings.Split(posn, ";")
+ for _, item := range cls {
+ item = strings.TrimPrefix(item, "bases ")
+ item = strings.TrimPrefix(item, "residues ")
+ item = strings.TrimSpace(item)
+ cols := strings.Fields(item)
+ if len(cols) == 3 && cols[1] == "to" {
+ arry = append(arry, cols[0]+".."+cols[2])
+ }
+ }
+ if len(arry) > 0 {
+ posit := strings.Join(arry, ",")
+ writeOneElement(" ", "INSDReference_position", posit)
} else {
- fmt.Fprintf(os.Stderr, "ERROR: "+line+"\n")
+ fmt.Fprintf(os.Stderr, "ERROR: "+posn+"\n")
}
}
} else {
@@ -10250,7 +10318,6 @@ func GenBankConverter(inp io.Reader) <-chan string {
writeOneElement(" ", "INSDReference_reference", ref)
}
- line = nextLine()
row++
if strings.HasPrefix(line, " AUTHORS") {
@@ -11295,7 +11362,7 @@ func main() {
debug.FreeOSMemory()
if timr {
- printDuration("records")
+ printDuration("blocks")
}
return
@@ -11303,13 +11370,51 @@ func main() {
// ASN.1 TO XML CONVERTER
- if args[0] == "-a2x" || args[0] == "-asn2xml" || args[0] == "-asn2tokens" {
+ if args[0] == "-a2x" || args[0] == "-asn2xml" {
+
+ set := ""
+ rec := ""
+
+ nextArg := func() (string, bool) {
+
+ if len(args) < 1 {
+ return "", false
+ }
+
+ // remove next token from slice
+ nxt := args[0]
+ args = args[1:]
+
+ return nxt, true
+ }
+
+ // look for optional arguments
+ args = args[1:]
+ for {
+ arg, ok := nextArg()
+ if !ok {
+ break
+ }
+
+ switch arg {
+ case "-set":
+ // override set wrapper
+ set, ok = nextArg()
+ if ok && set == "-" {
+ set = ""
+ }
+ case "-rec":
+ // override record wrapper
+ rec, ok = nextArg()
+ if ok && rec == "-" {
+ rec = ""
+ }
+ }
+ }
atkn := ASN1Tokenizer(in)
acnv := atkn
- if args[0] != "-asn2tokens" {
- acnv = ASN1Converter(atkn)
- }
+ acnv = ASN1Converter(atkn, set, rec)
if atkn == nil || acnv == nil {
fmt.Fprintf(os.Stderr, "\nERROR: Unable to create ASN.1 to XML converter\n")
@@ -11336,7 +11441,7 @@ func main() {
debug.FreeOSMemory()
if timr {
- printDuration("lines")
+ printDuration("blocks")
}
return
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/1a0a45b33d972a7e14864aeabc2f6ad7ec43122c...f2e309255fa5041d1709a57aa0230c54a066fe51
--
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/1a0a45b33d972a7e14864aeabc2f6ad7ec43122c...f2e309255fa5041d1709a57aa0230c54a066fe51
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201020/da0b232d/attachment-0001.html>
More information about the debian-med-commit
mailing list