[med-svn] [Git][med-team/ncbi-entrez-direct][upstream] New upstream version 14.6.20210209+dfsg

Aaron M. Ucko gitlab at salsa.debian.org
Fri Feb 12 22:45:46 GMT 2021



Aaron M. Ucko pushed to branch upstream at Debian Med / ncbi-entrez-direct


Commits:
bfa1bdb4 by Aaron M. Ucko at 2021-02-12T12:06:11-05:00
New upstream version 14.6.20210209+dfsg
- - - - -


2 changed files:

- transmute.go
- xtract.go


Changes:

=====================================
transmute.go
=====================================
@@ -155,6 +155,8 @@ Sequence Editing
     -delete      Delete N bases
     -insert      Insert given sequence
 
+  -extract     Use xtract -insd feat_location instructions
+
 Sequence Processing
 
   -cds2prot    Translate coding region into protein
@@ -274,14 +276,14 @@ Mitochondrial Mistranslation
 
   efetch -db nuccore -id NC_012920 -format gb |
   transmute -g2x |
-  xtract -insd CDS gene product translation sub_sequence |
-  while IFS=$'\t' read acc gene prod prot seq
+  xtract -insd CDS gene product protein_id translation sub_sequence |
+  while IFS=$'\t' read acc gene prod prid prot seq
   do
     mito=$( echo "$seq" | transmute -cds2prot -code 2 -stop )
     norm=$( echo "$seq" | transmute -cds2prot -code 1 -stop )
     if [ "$mito" != "$norm" ]
     then
-      echo ">$acc $gene $prod"
+      echo ">$acc $gene $prid $prod"
       transmute -diff <( echo "$mito" ) <( echo "$norm" )
       echo ""
     fi
@@ -4639,8 +4641,8 @@ func SequenceRemove(inp io.Reader, args []string) {
 		return
 	}
 
-	first := 0
-	last := 0
+	first := ""
+	last := ""
 
 	// skip past command name
 	args = args[1:]
@@ -4649,10 +4651,12 @@ func SequenceRemove(inp io.Reader, args []string) {
 
 		switch args[0] {
 		case "-first":
-			first = GetNumericArg(args, "Bases to delete at beginning", 0, -1, -1)
+			first = GetStringArg(args, "Bases to delete at beginning")
+			first = strings.ToUpper(first)
 			args = args[2:]
 		case "-last":
-			last = GetNumericArg(args, "Bases to delete at end", 0, -1, -1)
+			last = GetStringArg(args, "Bases to delete at end")
+			last = strings.ToUpper(last)
 			args = args[2:]
 		default:
 			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -remove command\n")
@@ -4663,21 +4667,63 @@ func SequenceRemove(inp io.Reader, args []string) {
 	str := ReadAllIntoSequence(inp)
 
 	ln := len(str)
-	if first > 0 {
-		if first <= ln {
-			str = str[first:]
-			ln = len(str)
-		} else {
-			fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", first, ln)
-			str = ""
+
+	if IsAllDigits(first) {
+		val, err := strconv.Atoi(first)
+		if err == nil && val > 0 {
+			if val <= ln {
+				str = str[val:]
+				ln = len(str)
+			} else {
+				fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", val, ln)
+				str = ""
+			}
+		}
+	} else {
+		val := len(first)
+		if val > 0 {
+			if val <= ln {
+				// warn if existing sequence does not match deletion argument
+				ext := str[:val]
+				if first != ext {
+					fmt.Fprintf(os.Stderr, "\nWARNING: -first argument %s does not match existing sequence %s\n", first, ext)
+				}
+				// delete characters
+				str = str[val:]
+				ln = len(str)
+			} else {
+				fmt.Fprintf(os.Stderr, "\nERROR: -first argument %d is greater than sequence length %d\n", val, ln)
+				str = ""
+			}
 		}
 	}
-	if last > 0 {
-		if last <= ln {
-			str = str[:ln-last]
-		} else {
-			fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than sequence length %d\n", last, ln)
-			str = ""
+
+	if IsAllDigits(last) {
+		val, err := strconv.Atoi(last)
+		if err == nil && val > 0 {
+			if val <= ln {
+				str = str[:ln-val]
+			} else {
+				fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than remaining sequence length %d\n", val, ln)
+				str = ""
+			}
+		}
+	} else {
+		val := len(last)
+		if val > 0 {
+			if val <= ln {
+				// warn if existing sequence does not match deletion argument
+				ext := str[ln-val:]
+				if last != ext {
+					fmt.Fprintf(os.Stderr, "\nWARNING: -last argument %s does not match existing sequence %s\n", last, ext)
+				}
+				// delete characters
+				str = str[:ln-val]
+				ln = len(str)
+			} else {
+				fmt.Fprintf(os.Stderr, "\nERROR: -last argument %d is greater than sequence length %d\n", val, ln)
+				str = ""
+			}
 		}
 	}
 
@@ -4846,6 +4892,77 @@ func SequenceReplace(inp io.Reader, args []string) {
 	}
 }
 
+func SequenceExtract(inp io.Reader, args []string) {
+
+	if inp == nil {
+		return
+	}
+
+	// skip past command name
+	args = args[1:]
+
+	if len(args) < 1 {
+		fmt.Fprintf(os.Stderr, "\nERROR: Missing argument after -extract command\n")
+		os.Exit(1)
+	}
+
+	// read output of xtract -insd feat_location qualifier
+	feat_loc := args[0]
+
+	str := ReadAllIntoSequence(inp)
+
+	ln := len(str)
+
+	// split intervals, e.g., "201..224,1550..1920,1986..2085,2317..2404,2466..2629"
+	comma := strings.Split(feat_loc, ",")
+
+	for _, item := range comma {
+
+		// also allow dash separator, e.g., "201-224,1550-1920,1986-2085,2317-2404,2466-2629"
+		item = strings.Replace(item, "-", "..", -1)
+
+		fr, to := SplitInTwoAt(item, "..", LEFT)
+
+		fr = strings.TrimSpace(fr)
+		to = strings.TrimSpace(to)
+
+		min, err := strconv.Atoi(fr)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized number '%s'\n", fr)
+			os.Exit(1)
+		}
+		if min < 1 || min > ln {
+			fmt.Fprintf(os.Stderr, "\nERROR: Starting point '%s' out of range\n", fr)
+			os.Exit(1)
+		}
+
+		max, err := strconv.Atoi(to)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized number '%s'\n", to)
+			os.Exit(1)
+		}
+		if max < 1 || max > ln {
+			fmt.Fprintf(os.Stderr, "\nERROR: Ending point '%s' out of range\n", to)
+			os.Exit(1)
+		}
+
+		if min < max {
+			min--
+			sub := str[min:max]
+			os.Stdout.WriteString(sub)
+		} else if min > max {
+			max--
+			sub := str[max:min]
+			sub = ReverseComplement(sub)
+			os.Stdout.WriteString(sub)
+		} else {
+			// need more information to know strand if single point
+		}
+	}
+
+	os.Stdout.WriteString("\n")
+}
+
 // REVERSE SEQUENCE
 
 // SeqFlip reverses without complementing - e.g., minus strand proteins translated in reverse order
@@ -4958,7 +5075,7 @@ func PrintFastaPairs(frst, scnd string) {
 	// print in blocks of 50 bases or residues
 	for i := 0; i < mx; i += 50 {
 		dl := 50
-		if mx -i < 50 {
+		if mx-i < 50 {
 			dl = mx - i
 		}
 		lf := fs[:dl]
@@ -8344,6 +8461,8 @@ func main() {
 		SequenceRetain(in, args)
 	case "-replace":
 		SequenceReplace(in, args)
+	case "-extract":
+		SequenceExtract(in, args)
 	case "-revcomp":
 		NucRevComp(in)
 	case "-reverse":


=====================================
xtract.go
=====================================
@@ -276,7 +276,7 @@ Command Generator
   Descriptors      INSDSeq_sequence INSDSeq_definition INSDSeq_division
   Flags            [complete|partial]
   Feature(s)       CDS,mRNA
-  Qualifiers       INSDFeature_key "#INSDInterval" gene product sub_sequence
+  Qualifiers       INSDFeature_key "#INSDInterval" gene product feat_location sub_sequence
 
 Variation Processing
 
@@ -6340,6 +6340,66 @@ func ProcessINSD(args []string, isPipe, addDash, doIndex bool) []string {
 				acc = append(acc, "-deq", "\"\\t\"")
 			}
 
+		} else if str == "feat_location" {
+
+			// special feat_location qualifier shows feature intervals
+			acc = append(acc, "-block", "INSDFeature_intervals")
+
+			acc = append(acc, "-subset", "INSDInterval", "-FR", "INSDInterval_from", "-TO", "INSDInterval_to")
+			if isPipe {
+				acc = append(acc, "-pfx", "", "-tab", "..", "-element", "&FR")
+				acc = append(acc, "-pfx", "", "-tab", ",", "-element", "&TO")
+			} else {
+				acc = append(acc, "-pfx", "\"\"", "-tab", "\"..\"", "-element", "\"&FR\"")
+				acc = append(acc, "-pfx", "\"\"", "-tab", "\",\"", "-element", "\"&TO\"")
+			}
+
+			acc = append(acc, "-subset", "INSDFeature_intervals")
+			if isPipe {
+				acc = append(acc, "-deq", "\\t")
+			} else {
+				acc = append(acc, "-deq", "\"\\t\"")
+			}
+
+
+		} else if str == "chloroplast" ||
+			str == "chromoplast" ||
+			str == "cyanelle" ||
+			str == "environmental_sample" ||
+			str == "focus" ||
+			str == "germline" ||
+			str == "kinetoplast" ||
+			str == "macronuclear" ||
+			str == "metagenomic" ||
+			str == "mitochondrion" ||
+			str == "partial" ||
+			str == "proviral" ||
+			str == "pseudo" ||
+			str == "rearranged" ||
+			str == "ribosomal_slippage" ||
+			str == "trans_splicing" ||
+			str == "transgenic" ||
+			str == "virion" {
+
+			acc = append(acc, "-block", "INSDQualifier")
+
+			checkAgainstVocabulary(str, "qualifier", qualifiers)
+			if doIndex {
+				acc = append(acc, "-if", "INSDQualifier_name", "-equals", str)
+				acc = append(acc, "-clr", "-indices", "INSDQualifier_name")
+			} else {
+				acc = append(acc, "-if", "INSDQualifier_name", "-equals", str)
+				acc = append(acc, "-lbl", str)
+			}
+			if addDash {
+				acc = append(acc, "-block", "INSDFeature", "-unless", "INSDQualifier_name", "-equals", str)
+				if isPipe {
+					acc = append(acc, "-lbl", "\\-")
+				} else {
+					acc = append(acc, "-lbl", "\"\\-\"")
+				}
+			}
+
 		} else {
 
 			acc = append(acc, "-block", "INSDQualifier")



View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/commit/bfa1bdb45ecf116d953c9f8f19a4a7f7685931e6

-- 
View it on GitLab: https://salsa.debian.org/med-team/ncbi-entrez-direct/-/commit/bfa1bdb45ecf116d953c9f8f19a4a7f7685931e6
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210212/68f475db/attachment-0001.html>


More information about the debian-med-commit mailing list