[med-svn] [Git][med-team/ncbi-entrez-direct][master] 4 commits: New upstream version 23.8.20250410+dfsg
Aaron M. Ucko (@ucko)
gitlab at salsa.debian.org
Fri Apr 11 03:02:46 BST 2025
Aaron M. Ucko pushed to branch master at Debian Med / ncbi-entrez-direct
Commits:
fdc0704d by Aaron M. Ucko at 2025-04-10T21:56:53-04:00
New upstream version 23.8.20250410+dfsg
- - - - -
0dc5ca67 by Aaron M. Ucko at 2025-04-10T21:58:06-04:00
Merge tag 'upstream/23.8.20250410+dfsg'
Upstream version 23.8.20250410(+dfsg).
- - - - -
993b8e01 by Aaron M. Ucko at 2025-04-10T21:59:16-04:00
debian/NEWS: Cite a non-elided version.
- - - - -
0d758c3b by Aaron M. Ucko at 2025-04-10T22:01:13-04:00
Finalize ncbi-entrez-direct 23.8.20250410+dfsg-1, back to unstable.
- - - - -
4 changed files:
- debian/NEWS
- debian/changelog
- eutils/table.go
- gff-sort
Changes:
=====================================
debian/NEWS
=====================================
@@ -1,4 +1,4 @@
-ncbi-entrez-direct (23.7.20250401+dfsg-1) experimental; urgency=medium
+ncbi-entrez-direct (23.8.20250408+dfsg-1) experimental; urgency=medium
I have, at least for now, abandoned the manpages as too much of a
maintenance burden and disabled their installation to avoid
=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+ncbi-entrez-direct (23.8.20250410+dfsg-1) unstable; urgency=medium
+
+ * New upstream release.
+ * Upload to unstable.
+ * debian/NEWS: Cite a non-elided version.
+
+ -- Aaron M. Ucko <ucko at debian.org> Thu, 10 Apr 2025 22:01:12 -0400
+
ncbi-entrez-direct (23.8.20250408+dfsg-1) experimental; urgency=medium
* New upstream release.
=====================================
eutils/table.go
=====================================
@@ -36,6 +36,7 @@ import (
"io"
"os"
"runtime"
+ "strconv"
"strings"
)
@@ -282,12 +283,15 @@ func ParentsToLineage(inp io.Reader) <-chan string {
okay := false
row := 0
+ hasFourColumns := false
// maximum depth to prevent stack overflow if circular references are present
const maxDepth = 1000
identToParent := make(map[string]string)
identToLineage := make(map[string]string)
+ identToLabel := make(map[string]string)
+ identToSort := make(map[string]int)
// getLineage recursive definition
var getLineage func(id string, depth int) (string, bool)
@@ -331,12 +335,16 @@ func ParentsToLineage(inp io.Reader) <-chan string {
// store newly-calculated lineage, to be used subsequently by its children
identToLineage[id] = lin
+ if hasFourColumns && identToSort[id] < identToSort[pt] {
+ DisplayError("[%s] %s should not be a child of [%s] %s", identToLabel[id], id, identToLabel[pt], pt)
+ }
+
return lin, true
}
scanr := bufio.NewScanner(inp)
- // read identifier and its immediate parent
+ // read identifier, its immediate parent, and optionally the feature type (e.g., gene) and the sort key (e.g., 1)
for scanr.Scan() {
line := scanr.Text()
@@ -344,14 +352,22 @@ func ParentsToLineage(inp io.Reader) <-chan string {
row++
cols := strings.Split(line, "\t")
+ ncols := len(cols)
- if len(cols) != 2 {
+ if ncols != 2 && ncols != 4 {
DisplayError("Row %d should not have %d columns", row, len(cols))
continue
}
id := cols[0]
prnt := cols[1]
+ lbl := ""
+ srt := ""
+ if ncols == 4 {
+ hasFourColumns = true
+ lbl = cols[2]
+ srt = cols[3]
+ }
vl, ok := identToParent[id]
if ok {
@@ -364,6 +380,31 @@ func ParentsToLineage(inp io.Reader) <-chan string {
if prnt != "" {
identToParent[id] = prnt
}
+
+ if lbl != "" {
+ vl, ok = identToLabel[id]
+ if ok {
+ if vl != lbl {
+ DisplayWarning("Conflicting label in row %d - '%s' went from '%s' to '%s'", row, id, vl, lbl)
+ }
+ } else {
+ identToLabel[id] = lbl
+ }
+ }
+
+ if srt != "" {
+ val, err := strconv.Atoi(srt)
+ if err == nil {
+ nm, oky := identToSort[id]
+ if oky {
+ if nm != val {
+ DisplayWarning("Conflicting sort key in row %d - '%s' went from '%d' to '%d'", row, id, nm, val)
+ }
+ } else {
+ identToSort[id] = val
+ }
+ }
+ }
}
// compute full lineage for each identifier
=====================================
gff-sort
=====================================
@@ -5,19 +5,40 @@
# gff-sort
+# HERE document for mapping feature keys to sort order (other keys ending with RNA separately mapped to 2)
+IFS='' read -r -d '' TYPEMAP <<'EOF'
+gene 1
+pseudogene 1
+mRNA 2
+primary_transcript 2
+C_region 2
+D_segment 2
+J_segment 2
+N_region 2
+S_region 2
+V_region 2
+V_segment 2
+CDS 3
+exon 4
+intron 5
+EOF
+
temp1=$(mktemp /tmp/GFF_TEMP1.XXXXXXXXX)
temp2=$(mktemp /tmp/GFF_TEMP2.XXXXXXXXX)
-temp3=$(mktemp /tmp/GFF_TEMP2.XXXXXXXXX)
grep '.' |
sed '/^#/d' |
+# read GFF3 tab-delimited data into XML structure
tbl2xml -rec Rec SeqID Source Type Start End Score Strand Phase Attributes |
-xtract -transform <( echo -e "gene\t1\nmRNA\t2\nCDS\t3\nexon\t4\nintron\t5\n" ) -rec Rec \
+# use xtract -with and -split arguments to separate individual tag=value attributes
+xtract -transform <( echo -e "$TYPEMAP" ) -rec Rec \
-pattern Rec \
-group Rec -pkg Fields \
-block "Rec/*" -element "*" \
- -block Type -def 6 -wrp Feat -translate Type \
+ -block Type -if Type -ends-with RNA -wrp Feat -lbl 2 \
+ -else -def 6 -wrp Feat -translate Type \
-group Rec -pkg Content -wrp Item -with ";" -split Attributes |
+# use xtract prefix and suffix trimming constructs to isolate tag and value
xtract -rec Rec \
-pattern Rec \
-group Fields -element "*" \
@@ -26,19 +47,18 @@ xtract -rec Rec \
transmute -mixed -format > $temp1
cat "$temp1" |
-xtract -pattern Rec -group Content -if ID -def "-" -element ID Parent > $temp2
-
-cat "$temp2" |
-transmute -p2l > $temp3
+# generate table with identifier, parent, feature key, and sort order columns
+xtract -pattern Rec -if Content/ID -def "-" -element ID Parent Type Feat |
+# convert to table with identifier and calculated lineage columns
+transmute -p2l > $temp2
cat "$temp1" |
-xtract -transform "$temp3" \
+xtract -transform "$temp2" \
-pattern Rec \
- -group "Fields/*" -def "-" -element "~" \
+ -group "Fields/*" -element "~" \
-group Content -def "-" -translate ID |
sort-table -k 1,1Vf -k 11,11f -k 7,7f -k 4,4n -k 5,5nr -k 10,10n |
cut -f 1-9
-rm "$temp3"
rm "$temp2"
rm "$temp1"
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/b2bd3bdec504f146bbee134bc8da276511e25265...0d758c3becbd9b83a3f205180241638b93f3d073
--
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/b2bd3bdec504f146bbee134bc8da276511e25265...0d758c3becbd9b83a3f205180241638b93f3d073
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250411/146641ff/attachment-0001.htm>
More information about the debian-med-commit
mailing list