[med-svn] [Git][med-team/ncbi-entrez-direct][master] 5 commits: New upstream version 14.6.20210209+dfsg
Aaron M. Ucko
gitlab at salsa.debian.org
Fri Feb 12 22:45:37 GMT 2021
Aaron M. Ucko pushed to branch master at Debian Med / ncbi-entrez-direct
Commits:
bfa1bdb4 by Aaron M. Ucko at 2021-02-12T12:06:11-05:00
New upstream version 14.6.20210209+dfsg
- - - - -
c8043a71 by Aaron M. Ucko at 2021-02-12T12:34:48-05:00
Merge tag 'upstream/14.6.20210209+dfsg'
Upstream version 14.6.20210209(+dfsg).
- - - - -
6e85df44 by Aaron M. Ucko at 2021-02-12T12:35:10-05:00
debian/man/xtract.1: Tune hyphenation under -insd.
- - - - -
30083842 by Aaron M. Ucko at 2021-02-12T12:35:22-05:00
debian/man/*.1: Update for new release (14.6.20210209[+dfsg]).
* transmute.1: New -extract option (Sequence Editing).
* xtract.1: New -insd qualifier type feat_location.
- - - - -
baad3cc2 by Aaron M. Ucko at 2021-02-12T12:35:33-05:00
Finalize ncbi-entrez-direct 14.6.20210209+dfsg-1 for unstable.
These changes are small enough for the soft freeze, and could have
beat the freeze if I hadn't waited for previous upload to migrate.
(golang-1.15 formally blocked it.)
- - - - -
5 changed files:
- debian/changelog
- debian/man/transmute.1
- debian/man/xtract.1
- transmute.go
- xtract.go
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+ncbi-entrez-direct (14.6.20210209+dfsg-1) unstable; urgency=medium
+
+ * New upstream release.
+ * debian/man/{transmute,xtract}.1: Update for new release.
+ * debian/man/xtract.1: Tune hyphenation under -insd.
+
+ -- Aaron M. Ucko <ucko at debian.org> Fri, 12 Feb 2021 12:31:49 -0500
+
ncbi-entrez-direct (14.6.20210203+dfsg-2) unstable; urgency=medium
* Standards-Version: 4.5.1 (routine-update)
=====================================
debian/man/transmute.1
=====================================
@@ -1,4 +1,4 @@
-.TH TRANSMUTE 1 2021-02-06 NCBI "NCBI Entrez Direct User's Manual"
+.TH TRANSMUTE 1 2021-02-12 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
align\-columns, gbf2xml, transmute \- transform (NCBI Entrez Direct) data
.SH SYNOPSIS
@@ -45,6 +45,8 @@ align\-columns, gbf2xml, transmute \- transform (NCBI Entrez Direct) data
[\|\fB\-delete\fP\ \fIN\fP\|]
[\|\fB\-insert\fP\ \fIseq\fP\|]
+\fBtransmute\fP \fB\-extract\fP\ \fIfeat_loc\fP
+
\fBtransmute\fP \fB\-cds2prot\fP
[\|\fB\-code\fP\ \fIN\fP\|]
[\|\fB\-frame\fP\ \fIN\fP\|]
@@ -237,6 +239,9 @@ Delete \fIN\fP bases or residues.
Insert given sequence.
.RE
.PD
+.TP 10
+\fB\-extract\fP\ \fIfeat_loc\fP
+Use \fBxtract \-insd\fP ... \fBfeat_location\fP instructions.
.SS Sequence Processing
.TP 10
\fB\-cds2prot\fP
=====================================
debian/man/xtract.1
=====================================
@@ -1,4 +1,4 @@
-.TH XTRACT 1 2021-02-06 NCBI "NCBI Entrez Direct User's Manual"
+.TH XTRACT 1 2021-02-12 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
xtract \- NCBI Entrez Direct XML conversion and transformation tool
.SH SYNOPSIS
@@ -541,18 +541,20 @@ Print them if invoked standalone;
run them if invoked as part of a pipeline.
Requires one or more arguments,
which may appear in the following order:
+.nh
.RS
.\".PD 0
.IP Descriptor(s) 15
-.BR INSDSeq_sequence / INSDSeq_definition / INSDSeq_division "/... [\|...\|]"
+.BR INSDSeq_sequence / INSDSeq_definition /\: INSDSeq_division "/... [\|...\|]"
.IP Completeness 15
.BR complete / partial
.IP Feature(s) 15
.BR CDS / mRNA /...[\| , ...\|]
.IP Qualifier(s)
-.BR INSDFeature_key / \(dq#INSDInterval\(dq / gene / product / sub_sequence "/... [\|...\|]"
+.BR INSDFeature_key / \(dq#INSDInterval\(dq / gene / product /\: feat_location / sub_sequence "/... [\|...\|]"
.\".PD
.RE
+.hy 1
.SS Frequency Table
.TP
\fB\-histogram\fP
=====================================
transmute.go
=====================================
@@ -155,6 +155,8 @@ Sequence Editing
-delete Delete N bases
-insert Insert given sequence
+ -extract Use xtract -insd feat_location instructions
+
Sequence Processing
-cds2prot Translate coding region into protein
@@ -274,14 +276,14 @@ Mitochondrial Mistranslation
efetch -db nuccore -id NC_012920 -format gb |
transmute -g2x |
- xtract -insd CDS gene product translation sub_sequence |
- while IFS=$'\t' read acc gene prod prot seq
+ xtract -insd CDS gene product protein_id translation sub_sequence |
+ while IFS=$'\t' read acc gene prod prid prot seq
do
mito=$( echo "$seq" | transmute -cds2prot -code 2 -stop )
norm=$( echo "$seq" | transmute -cds2prot -code 1 -stop )
if [ "$mito" != "$norm" ]
then
- echo ">$acc $gene $prod"
+ echo ">$acc $gene $prid $prod"
transmute -diff <( echo "$mito" ) <( echo "$norm" )
echo ""
fi
@@ -4639,8 +4641,8 @@ func SequenceRemove(inp io.Reader, args []string) {
return
}
- first := 0
- last := 0
+ first := ""
+ last := ""
// skip past command name
args = args[1:]
@@ -4649,10 +4651,12 @@ func SequenceRemove(inp io.Reader, args []string) {
switch args[0] {
case "-first":
- first = GetNumericArg(args, "Bases to delete at beginning", 0, -1, -1)
+ first = GetStringArg(args, "Bases to delete at beginning")
+ first = strings.ToUpper(first)
args = args[2:]
case "-last":
- last = GetNumericArg(args, "Bases to delete at end", 0, -1, -1)
+ last = GetStringArg(args, "Bases to delete at end")
+ last = strings.ToUpper(last)
args = args[2:]
default:
fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -remove command\n")
@@ -4663,21 +4667,63 @@ func SequenceRemove(inp io.Reader, args []string) {
str := ReadAllIntoSequence(inp)
ln := len(str)
- if first > 0 {
- if first <= ln {
- str = str[first:]
- ln = len(str)
- } else {
- fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", first, ln)
- str = ""
+
+ if IsAllDigits(first) {
+ val, err := strconv.Atoi(first)
+ if err == nil && val > 0 {
+ if val <= ln {
+ str = str[val:]
+ ln = len(str)
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", val, ln)
+ str = ""
+ }
+ }
+ } else {
+ val := len(first)
+ if val > 0 {
+ if val <= ln {
+ // warn if existing sequence does not match deletion argument
+ ext := str[:val]
+ if first != ext {
+ fmt.Fprintf(os.Stderr, "\nWARNING: -first argument %s does not match existing sequence %s\n", first, ext)
+ }
+ // delete characters
+ str = str[val:]
+ ln = len(str)
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", val, ln)
+ str = ""
+ }
}
}
- if last > 0 {
- if last <= ln {
- str = str[:ln-last]
- } else {
- fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than sequence length %d\n", last, ln)
- str = ""
+
+ if IsAllDigits(last) {
+ val, err := strconv.Atoi(last)
+ if err == nil && val > 0 {
+ if val <= ln {
+ str = str[:ln-val]
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than remaining sequence length %d\n", val, ln)
+ str = ""
+ }
+ }
+ } else {
+ val := len(last)
+ if val > 0 {
+ if val <= ln {
+ // warn if existing sequence does not match deletion argument
+ ext := str[ln-val:]
+ if last != ext {
+ fmt.Fprintf(os.Stderr, "\nWARNING: -last argument %s does not match existing sequence %s\n", last, ext)
+ }
+ // delete characters
+ str = str[:ln-val]
+ ln = len(str)
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than sequence length %d\n", val, ln)
+ str = ""
+ }
}
}
@@ -4846,6 +4892,77 @@ func SequenceReplace(inp io.Reader, args []string) {
}
}
+func SequenceExtract(inp io.Reader, args []string) {
+
+ if inp == nil {
+ return
+ }
+
+ // skip past command name
+ args = args[1:]
+
+ if len(args) < 1 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Missing argument after -extract command\n")
+ os.Exit(1)
+ }
+
+ // read output of xtract -insd feat_location qualifier
+ feat_loc := args[0]
+
+ str := ReadAllIntoSequence(inp)
+
+ ln := len(str)
+
+ // split intervals, e.g., "201..224,1550..1920,1986..2085,2317..2404,2466..2629"
+ comma := strings.Split(feat_loc, ",")
+
+ for _, item := range comma {
+
+ // also allow dash separator, e.g., "201-224,1550-1920,1986-2085,2317-2404,2466-2629"
+ item = strings.Replace(item, "-", "..", -1)
+
+ fr, to := SplitInTwoAt(item, "..", LEFT)
+
+ fr = strings.TrimSpace(fr)
+ to = strings.TrimSpace(to)
+
+ min, err := strconv.Atoi(fr)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized number '%s'\n", fr)
+ os.Exit(1)
+ }
+ if min < 1 || min > ln {
+ fmt.Fprintf(os.Stderr, "\nERROR: Starting point '%s' out of range\n", fr)
+ os.Exit(1)
+ }
+
+ max, err := strconv.Atoi(to)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized number '%s'\n", to)
+ os.Exit(1)
+ }
+ if max < 1 || max > ln {
+ fmt.Fprintf(os.Stderr, "\nERROR: Ending point '%s' out of range\n", to)
+ os.Exit(1)
+ }
+
+ if min < max {
+ min--
+ sub := str[min:max]
+ os.Stdout.WriteString(sub)
+ } else if min > max {
+ max--
+ sub := str[max:min]
+ sub = ReverseComplement(sub)
+ os.Stdout.WriteString(sub)
+ } else {
+ // need more information to know strand if single point
+ }
+ }
+
+ os.Stdout.WriteString("\n")
+}
+
// REVERSE SEQUENCE
// SeqFlip reverses without complementing - e.g., minus strand proteins translated in reverse order
@@ -4958,7 +5075,7 @@ func PrintFastaPairs(frst, scnd string) {
// print in blocks of 50 bases or residues
for i := 0; i < mx; i += 50 {
dl := 50
- if mx -i < 50 {
+ if mx-i < 50 {
dl = mx - i
}
lf := fs[:dl]
@@ -8344,6 +8461,8 @@ func main() {
SequenceRetain(in, args)
case "-replace":
SequenceReplace(in, args)
+ case "-extract":
+ SequenceExtract(in, args)
case "-revcomp":
NucRevComp(in)
case "-reverse":
=====================================
xtract.go
=====================================
@@ -276,7 +276,7 @@ Command Generator
Descriptors INSDSeq_sequence INSDSeq_definition INSDSeq_division
Flags [complete|partial]
Feature(s) CDS,mRNA
- Qualifiers INSDFeature_key "#INSDInterval" gene product sub_sequence
+ Qualifiers INSDFeature_key "#INSDInterval" gene product feat_location sub_sequence
Variation Processing
@@ -6340,6 +6340,66 @@ func ProcessINSD(args []string, isPipe, addDash, doIndex bool) []string {
acc = append(acc, "-deq", "\"\\t\"")
}
+ } else if str == "feat_location" {
+
+ // special feat_location qualifier shows feature intervals
+ acc = append(acc, "-block", "INSDFeature_intervals")
+
+ acc = append(acc, "-subset", "INSDInterval", "-FR", "INSDInterval_from", "-TO", "INSDInterval_to")
+ if isPipe {
+ acc = append(acc, "-pfx", "", "-tab", "..", "-element", "&FR")
+ acc = append(acc, "-pfx", "", "-tab", ",", "-element", "&TO")
+ } else {
+ acc = append(acc, "-pfx", "\"\"", "-tab", "\"..\"", "-element", "\"&FR\"")
+ acc = append(acc, "-pfx", "\"\"", "-tab", "\",\"", "-element", "\"&TO\"")
+ }
+
+ acc = append(acc, "-subset", "INSDFeature_intervals")
+ if isPipe {
+ acc = append(acc, "-deq", "\\t")
+ } else {
+ acc = append(acc, "-deq", "\"\\t\"")
+ }
+
+
+ } else if str == "chloroplast" ||
+ str == "chromoplast" ||
+ str == "cyanelle" ||
+ str == "environmental_sample" ||
+ str == "focus" ||
+ str == "germline" ||
+ str == "kinetoplast" ||
+ str == "macronuclear" ||
+ str == "metagenomic" ||
+ str == "mitochondrion" ||
+ str == "partial" ||
+ str == "proviral" ||
+ str == "pseudo" ||
+ str == "rearranged" ||
+ str == "ribosomal_slippage" ||
+ str == "trans_splicing" ||
+ str == "transgenic" ||
+ str == "virion" {
+
+ acc = append(acc, "-block", "INSDQualifier")
+
+ checkAgainstVocabulary(str, "qualifier", qualifiers)
+ if doIndex {
+ acc = append(acc, "-if", "INSDQualifier_name", "-equals", str)
+ acc = append(acc, "-clr", "-indices", "INSDQualifier_name")
+ } else {
+ acc = append(acc, "-if", "INSDQualifier_name", "-equals", str)
+ acc = append(acc, "-lbl", str)
+ }
+ if addDash {
+ acc = append(acc, "-block", "INSDFeature", "-unless", "INSDQualifier_name", "-equals", str)
+ if isPipe {
+ acc = append(acc, "-lbl", "\\-")
+ } else {
+ acc = append(acc, "-lbl", "\"\\-\"")
+ }
+ }
+
} else {
acc = append(acc, "-block", "INSDQualifier")
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/fe1e6da39bf57ea0e3c70fcb07425e77995ca363...baad3cc2c8cb9ac3914b3f86b251d204ec06c551
--
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/compare/fe1e6da39bf57ea0e3c70fcb07425e77995ca363...baad3cc2c8cb9ac3914b3f86b251d204ec06c551
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210212/91c5eb67/attachment-0001.html>
More information about the debian-med-commit
mailing list