[med-svn] [Git][med-team/ncbi-entrez-direct][upstream] New upstream version 14.6.20210209+dfsg
Aaron M. Ucko
gitlab at salsa.debian.org
Fri Feb 12 22:45:46 GMT 2021
Aaron M. Ucko pushed to branch upstream at Debian Med / ncbi-entrez-direct
Commits:
bfa1bdb4 by Aaron M. Ucko at 2021-02-12T12:06:11-05:00
New upstream version 14.6.20210209+dfsg
- - - - -
2 changed files:
- transmute.go
- xtract.go
Changes:
=====================================
transmute.go
=====================================
@@ -155,6 +155,8 @@ Sequence Editing
-delete Delete N bases
-insert Insert given sequence
+ -extract Use xtract -insd feat_location instructions
+
Sequence Processing
-cds2prot Translate coding region into protein
@@ -274,14 +276,14 @@ Mitochondrial Mistranslation
efetch -db nuccore -id NC_012920 -format gb |
transmute -g2x |
- xtract -insd CDS gene product translation sub_sequence |
- while IFS=$'\t' read acc gene prod prot seq
+ xtract -insd CDS gene product protein_id translation sub_sequence |
+ while IFS=$'\t' read acc gene prod prid prot seq
do
mito=$( echo "$seq" | transmute -cds2prot -code 2 -stop )
norm=$( echo "$seq" | transmute -cds2prot -code 1 -stop )
if [ "$mito" != "$norm" ]
then
- echo ">$acc $gene $prod"
+ echo ">$acc $gene $prid $prod"
transmute -diff <( echo "$mito" ) <( echo "$norm" )
echo ""
fi
@@ -4639,8 +4641,8 @@ func SequenceRemove(inp io.Reader, args []string) {
return
}
- first := 0
- last := 0
+ first := ""
+ last := ""
// skip past command name
args = args[1:]
@@ -4649,10 +4651,12 @@ func SequenceRemove(inp io.Reader, args []string) {
switch args[0] {
case "-first":
- first = GetNumericArg(args, "Bases to delete at beginning", 0, -1, -1)
+ first = GetStringArg(args, "Bases to delete at beginning")
+ first = strings.ToUpper(first)
args = args[2:]
case "-last":
- last = GetNumericArg(args, "Bases to delete at end", 0, -1, -1)
+ last = GetStringArg(args, "Bases to delete at end")
+ last = strings.ToUpper(last)
args = args[2:]
default:
fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -remove command\n")
@@ -4663,21 +4667,63 @@ func SequenceRemove(inp io.Reader, args []string) {
str := ReadAllIntoSequence(inp)
ln := len(str)
- if first > 0 {
- if first <= ln {
- str = str[first:]
- ln = len(str)
- } else {
- fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", first, ln)
- str = ""
+
+ if IsAllDigits(first) {
+ val, err := strconv.Atoi(first)
+ if err == nil && val > 0 {
+ if val <= ln {
+ str = str[val:]
+ ln = len(str)
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", val, ln)
+ str = ""
+ }
+ }
+ } else {
+ val := len(first)
+ if val > 0 {
+ if val <= ln {
+ // warn if existing sequence does not match deletion argument
+ ext := str[:val]
+ if first != ext {
+ fmt.Fprintf(os.Stderr, "\nWARNING: -first argument %s does not match existing sequence %s\n", first, ext)
+ }
+ // delete characters
+ str = str[val:]
+ ln = len(str)
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", val, ln)
+ str = ""
+ }
}
}
- if last > 0 {
- if last <= ln {
- str = str[:ln-last]
- } else {
- fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than sequence length %d\n", last, ln)
- str = ""
+
+ if IsAllDigits(last) {
+ val, err := strconv.Atoi(last)
+ if err == nil && val > 0 {
+ if val <= ln {
+ str = str[:ln-val]
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than remaining sequence length %d\n", val, ln)
+ str = ""
+ }
+ }
+ } else {
+ val := len(last)
+ if val > 0 {
+ if val <= ln {
+ // warn if existing sequence does not match deletion argument
+ ext := str[ln-val:]
+ if last != ext {
+ fmt.Fprintf(os.Stderr, "\nWARNING: -last argument %s does not match existing sequence %s\n", last, ext)
+ }
+ // delete characters
+ str = str[:ln-val]
+ ln = len(str)
+ } else {
+ fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than sequence length %d\n", val, ln)
+ str = ""
+ }
}
}
@@ -4846,6 +4892,77 @@ func SequenceReplace(inp io.Reader, args []string) {
}
}
+func SequenceExtract(inp io.Reader, args []string) {
+
+ if inp == nil {
+ return
+ }
+
+ // skip past command name
+ args = args[1:]
+
+ if len(args) < 1 {
+ fmt.Fprintf(os.Stderr, "\nERROR: Missing argument after -extract command\n")
+ os.Exit(1)
+ }
+
+ // read output of xtract -insd feat_location qualifier
+ feat_loc := args[0]
+
+ str := ReadAllIntoSequence(inp)
+
+ ln := len(str)
+
+ // split intervals, e.g., "201..224,1550..1920,1986..2085,2317..2404,2466..2629"
+ comma := strings.Split(feat_loc, ",")
+
+ for _, item := range comma {
+
+ // also allow dash separator, e.g., "201-224,1550-1920,1986-2085,2317-2404,2466-2629"
+ item = strings.Replace(item, "-", "..", -1)
+
+ fr, to := SplitInTwoAt(item, "..", LEFT)
+
+ fr = strings.TrimSpace(fr)
+ to = strings.TrimSpace(to)
+
+ min, err := strconv.Atoi(fr)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized number '%s'\n", fr)
+ os.Exit(1)
+ }
+ if min < 1 || min > ln {
+ fmt.Fprintf(os.Stderr, "\nERROR: Starting point '%s' out of range\n", fr)
+ os.Exit(1)
+ }
+
+ max, err := strconv.Atoi(to)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized number '%s'\n", to)
+ os.Exit(1)
+ }
+ if max < 1 || max > ln {
+ fmt.Fprintf(os.Stderr, "\nERROR: Ending point '%s' out of range\n", to)
+ os.Exit(1)
+ }
+
+ if min < max {
+ min--
+ sub := str[min:max]
+ os.Stdout.WriteString(sub)
+ } else if min > max {
+ max--
+ sub := str[max:min]
+ sub = ReverseComplement(sub)
+ os.Stdout.WriteString(sub)
+ } else {
+ // need more information to know strand if single point
+ }
+ }
+
+ os.Stdout.WriteString("\n")
+}
+
// REVERSE SEQUENCE
// SeqFlip reverses without complementing - e.g., minus strand proteins translated in reverse order
@@ -4958,7 +5075,7 @@ func PrintFastaPairs(frst, scnd string) {
// print in blocks of 50 bases or residues
for i := 0; i < mx; i += 50 {
dl := 50
- if mx -i < 50 {
+ if mx-i < 50 {
dl = mx - i
}
lf := fs[:dl]
@@ -8344,6 +8461,8 @@ func main() {
SequenceRetain(in, args)
case "-replace":
SequenceReplace(in, args)
+ case "-extract":
+ SequenceExtract(in, args)
case "-revcomp":
NucRevComp(in)
case "-reverse":
=====================================
xtract.go
=====================================
@@ -276,7 +276,7 @@ Command Generator
Descriptors INSDSeq_sequence INSDSeq_definition INSDSeq_division
Flags [complete|partial]
Feature(s) CDS,mRNA
- Qualifiers INSDFeature_key "#INSDInterval" gene product sub_sequence
+ Qualifiers INSDFeature_key "#INSDInterval" gene product feat_location sub_sequence
Variation Processing
@@ -6340,6 +6340,66 @@ func ProcessINSD(args []string, isPipe, addDash, doIndex bool) []string {
acc = append(acc, "-deq", "\"\\t\"")
}
+ } else if str == "feat_location" {
+
+ // special feat_location qualifier shows feature intervals
+ acc = append(acc, "-block", "INSDFeature_intervals")
+
+ acc = append(acc, "-subset", "INSDInterval", "-FR", "INSDInterval_from", "-TO", "INSDInterval_to")
+ if isPipe {
+ acc = append(acc, "-pfx", "", "-tab", "..", "-element", "&FR")
+ acc = append(acc, "-pfx", "", "-tab", ",", "-element", "&TO")
+ } else {
+ acc = append(acc, "-pfx", "\"\"", "-tab", "\"..\"", "-element", "\"&FR\"")
+ acc = append(acc, "-pfx", "\"\"", "-tab", "\",\"", "-element", "\"&TO\"")
+ }
+
+ acc = append(acc, "-subset", "INSDFeature_intervals")
+ if isPipe {
+ acc = append(acc, "-deq", "\\t")
+ } else {
+ acc = append(acc, "-deq", "\"\\t\"")
+ }
+
+
+ } else if str == "chloroplast" ||
+ str == "chromoplast" ||
+ str == "cyanelle" ||
+ str == "environmental_sample" ||
+ str == "focus" ||
+ str == "germline" ||
+ str == "kinetoplast" ||
+ str == "macronuclear" ||
+ str == "metagenomic" ||
+ str == "mitochondrion" ||
+ str == "partial" ||
+ str == "proviral" ||
+ str == "pseudo" ||
+ str == "rearranged" ||
+ str == "ribosomal_slippage" ||
+ str == "trans_splicing" ||
+ str == "transgenic" ||
+ str == "virion" {
+
+ acc = append(acc, "-block", "INSDQualifier")
+
+ checkAgainstVocabulary(str, "qualifier", qualifiers)
+ if doIndex {
+ acc = append(acc, "-if", "INSDQualifier_name", "-equals", str)
+ acc = append(acc, "-clr", "-indices", "INSDQualifier_name")
+ } else {
+ acc = append(acc, "-if", "INSDQualifier_name", "-equals", str)
+ acc = append(acc, "-lbl", str)
+ }
+ if addDash {
+ acc = append(acc, "-block", "INSDFeature", "-unless", "INSDQualifier_name", "-equals", str)
+ if isPipe {
+ acc = append(acc, "-lbl", "\\-")
+ } else {
+ acc = append(acc, "-lbl", "\"\\-\"")
+ }
+ }
+
} else {
acc = append(acc, "-block", "INSDQualifier")
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/commit/bfa1bdb45ecf116d953c9f8f19a4a7f7685931e6
--
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/commit/bfa1bdb45ecf116d953c9f8f19a4a7f7685931e6
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210212/68f475db/attachment-0001.html>
More information about the debian-med-commit
mailing list