[med-svn] [Git][med-team/ncbi-entrez-direct][master] 4 commits: New upstream version 23.8.20250410+dfsg

Aaron M. Ucko (@ucko) gitlab at salsa.debian.org
Fri Apr 11 03:02:46 BST 2025



Aaron M. Ucko pushed to branch master at Debian Med / ncbi-entrez-direct


Commits:
fdc0704d by Aaron M. Ucko at 2025-04-10T21:56:53-04:00
New upstream version 23.8.20250410+dfsg
- - - - -
0dc5ca67 by Aaron M. Ucko at 2025-04-10T21:58:06-04:00
Merge tag 'upstream/23.8.20250410+dfsg'

Upstream version 23.8.20250410(+dfsg).

- - - - -
993b8e01 by Aaron M. Ucko at 2025-04-10T21:59:16-04:00
debian/NEWS: Cite a non-elided version.

- - - - -
0d758c3b by Aaron M. Ucko at 2025-04-10T22:01:13-04:00
Finalize ncbi-entrez-direct 23.8.20250410+dfsg-1, back to unstable.

- - - - -


4 changed files:

- debian/NEWS
- debian/changelog
- eutils/table.go
- gff-sort


Changes:

=====================================
debian/NEWS
=====================================
@@ -1,4 +1,4 @@
-ncbi-entrez-direct (23.7.20250401+dfsg-1) experimental; urgency=medium
+ncbi-entrez-direct (23.8.20250408+dfsg-1) experimental; urgency=medium
 
   I have, at least for now, abandoned the manpages as too much of a
   maintenance burden and disabled their installation to avoid


=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+ncbi-entrez-direct (23.8.20250410+dfsg-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Upload to unstable.
+  * debian/NEWS: Cite a non-elided version.
+
+ -- Aaron M. Ucko <ucko at debian.org>  Thu, 10 Apr 2025 22:01:12 -0400
+
 ncbi-entrez-direct (23.8.20250408+dfsg-1) experimental; urgency=medium
 
   * New upstream release.


=====================================
eutils/table.go
=====================================
@@ -36,6 +36,7 @@ import (
 	"io"
 	"os"
 	"runtime"
+	"strconv"
 	"strings"
 )
 
@@ -282,12 +283,15 @@ func ParentsToLineage(inp io.Reader) <-chan string {
 
 		okay := false
 		row := 0
+		hasFourColumns := false
 
 		// maximum depth to prevent stack overflow if circular references are present
 		const maxDepth = 1000
 
 		identToParent := make(map[string]string)
 		identToLineage := make(map[string]string)
+		identToLabel := make(map[string]string)
+		identToSort := make(map[string]int)
 
 		// getLineage recursive definition
 		var getLineage func(id string, depth int) (string, bool)
@@ -331,12 +335,16 @@ func ParentsToLineage(inp io.Reader) <-chan string {
 			// store newly-calculated lineage, to be used subsequently by its children
 			identToLineage[id] = lin
 
+			if hasFourColumns && identToSort[id] < identToSort[pt] {
+				DisplayError("[%s] %s should not be a child of [%s] %s", identToLabel[id], id, identToLabel[pt], pt)
+			}
+
 			return lin, true
 		}
 
 		scanr := bufio.NewScanner(inp)
 
-		// read identifier and its immediate parent
+		// read identifier, its immediate parent, and optionally the feature type (e.g., gene) and the sort key (e.g., 1)
 		for scanr.Scan() {
 
 			line := scanr.Text()
@@ -344,14 +352,22 @@ func ParentsToLineage(inp io.Reader) <-chan string {
 			row++
 
 			cols := strings.Split(line, "\t")
+			ncols := len(cols)
 
-			if len(cols) != 2 {
+			if ncols != 2 && ncols != 4 {
 				DisplayError("Row %d should not have %d columns", row, len(cols))
 				continue
 			}
 
 			id := cols[0]
 			prnt := cols[1]
+			lbl := ""
+			srt := ""
+			if ncols == 4 {
+				hasFourColumns = true
+				lbl = cols[2]
+				srt = cols[3]
+			}
 
 			vl, ok := identToParent[id]
 			if ok {
@@ -364,6 +380,31 @@ func ParentsToLineage(inp io.Reader) <-chan string {
 			if prnt != "" {
 				identToParent[id] = prnt
 			}
+
+			if lbl != "" {
+				vl, ok = identToLabel[id]
+				if ok {
+					if vl != lbl {
+						DisplayWarning("Conflicting label in row %d - '%s' went from '%s' to '%s'", row, id, vl, lbl)
+					}
+				} else {
+					identToLabel[id] = lbl
+				}
+			}
+
+			if srt != "" {
+				val, err := strconv.Atoi(srt)
+				if err == nil {
+					nm, oky := identToSort[id]
+					if oky {
+						if nm != val {
+							DisplayWarning("Conflicting sort key in row %d -  '%s' went from '%d' to '%d'", row, id, nm, val)
+						}
+					} else {
+						identToSort[id] = val
+					}
+				}
+			}
 		}
 
 		// compute full lineage for each identifier


=====================================
gff-sort
=====================================
@@ -5,19 +5,40 @@
 
 # gff-sort
 
+# HERE document for mapping feature keys to sort order (other keys ending with RNA separately mapped to 2)
+IFS='' read -r -d '' TYPEMAP <<'EOF'
+gene	1
+pseudogene	1
+mRNA	2
+primary_transcript	2
+C_region	2
+D_segment	2
+J_segment	2
+N_region	2
+S_region	2
+V_region	2
+V_segment	2
+CDS	3
+exon	4
+intron	5
+EOF
+
 temp1=$(mktemp /tmp/GFF_TEMP1.XXXXXXXXX)
 temp2=$(mktemp /tmp/GFF_TEMP2.XXXXXXXXX)
-temp3=$(mktemp /tmp/GFF_TEMP2.XXXXXXXXX)
 
 grep '.' |
 sed '/^#/d' |
+# read GFF3 tab-delimited data into XML structure
 tbl2xml -rec Rec SeqID Source Type Start End Score Strand Phase Attributes |
-xtract -transform <( echo -e "gene\t1\nmRNA\t2\nCDS\t3\nexon\t4\nintron\t5\n" ) -rec Rec \
+# use xtract -with and -split arguments to separate individual tag=value attributes
+xtract -transform <( echo -e "$TYPEMAP" ) -rec Rec \
   -pattern Rec \
     -group Rec -pkg Fields \
       -block "Rec/*" -element "*" \
-      -block Type -def 6 -wrp Feat -translate Type \
+      -block Type -if Type -ends-with RNA -wrp Feat -lbl 2 \
+        -else -def 6 -wrp Feat -translate Type \
     -group Rec -pkg Content -wrp Item -with ";" -split Attributes |
+# use xtract prefix and suffix trimming constructs to isolate tag and value
 xtract -rec Rec \
   -pattern Rec \
     -group Fields -element "*" \
@@ -26,19 +47,18 @@ xtract -rec Rec \
 transmute -mixed -format > $temp1
 
 cat "$temp1" |
-xtract -pattern Rec -group Content -if ID -def "-" -element ID Parent > $temp2
-
-cat "$temp2" |
-transmute -p2l > $temp3
+# generate table with identifier, parent, feature key, and sort order columns
+xtract -pattern Rec -if Content/ID -def "-" -element ID Parent Type Feat |
+# convert to table with identifier and calculated lineage columns
+transmute -p2l > $temp2
 
 cat "$temp1" |
-xtract -transform "$temp3" \
+xtract -transform "$temp2" \
   -pattern Rec \
-    -group "Fields/*" -def "-" -element "~" \
+    -group "Fields/*" -element "~" \
     -group Content -def "-" -translate ID |
 sort-table -k 1,1Vf -k 11,11f -k 7,7f -k 4,4n -k 5,5nr -k 10,10n |
 cut -f 1-9
 
-rm "$temp3"
 rm "$temp2"
 rm "$temp1"



View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/b2bd3bdec504f146bbee134bc8da276511e25265...0d758c3becbd9b83a3f205180241638b93f3d073

-- 
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/b2bd3bdec504f146bbee134bc8da276511e25265...0d758c3becbd9b83a3f205180241638b93f3d073
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250411/146641ff/attachment-0001.htm>


More information about the debian-med-commit mailing list