[med-svn] [Git][med-team/kmerresistance][upstream] 2 commits: New upstream version 2.0+git20190225.39353d3
Andreas Tille
gitlab at salsa.debian.org
Mon Jun 24 07:21:48 BST 2019
Andreas Tille pushed to branch upstream at Debian Med / kmerresistance
Commits:
b44530a0 by Andreas Tille at 2019-06-24T05:52:44Z
New upstream version 2.0+git20190225.39353d3
- - - - -
fcf76aa9 by Andreas Tille at 2019-06-24T06:12:43Z
New upstream version 2.2.0
- - - - -
9 changed files:
- + .gitignore
- KmerResistance.c
- + Makefile
- README.md
- − ResFinder.fsa
- + pherror.c
- + pherror.h
- + qseqs.c
- + qseqs.h
Changes:
=====================================
.gitignore
=====================================
@@ -0,0 +1,2 @@
+*.o
+kmerresistance
=====================================
KmerResistance.c
=====================================
@@ -16,18 +16,21 @@
See the License for the specific language governing permissions and
limitations under the License.
*/
-
-#include <stdio.h>
+#define _XOPEN_SOURCE 600
+#include <ctype.h>
+#include <errno.h>
#include <math.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <time.h>
-#include <ctype.h>
-#include <errno.h>
-
+#include "pherror.h"
+#include "qseqs.h"
+#define VERSION "2.2.0"
int strpos_last(const char* str1, const char* str2) {
- char* strp;
+
+ char *strp;
int i, len1, len2;
len1 = strlen(str1);
@@ -36,48 +39,58 @@ int strpos_last(const char* str1, const char* str2) {
return -1;
}
- strp = (char*)(str1 + len1 - len2);
+ strp = (char*)(str1 + len1 - len2 + 1);
for(i = len1 - len2; i >= 0; i--) {
- if(*strp == *str2) {
- if(strncmp(strp,str2,len2)==0)
- return i;
+ if(*--strp == *str2 && strncmp(strp,str2,len2) == 0) {
+ return i;
}
- strp--;
}
return -1;
}
-long unsigned fget_to(char *line, long unsigned line_size, FILE *file, char stopChar) {
- int c, stop;
- long unsigned i;
+int fget2(Qseqs *line, FILE *file, char stopChar) {
- stop = 0;
- i = 0;
- while(!stop) {
- while(i < line_size) {
- if((c = fgetc(file)) != EOF) {
- line[i] = c;
- } else {
- line[i] = '\0';
- return line_size;
- }
- if(c != stopChar) {
- i++;
+ int c, len, size;
+ unsigned char *ptr;
+
+ len = 0;
+ size = line->size;
+ ptr = line->seq;
+ while((c = fgetc(file)) != EOF) {
+ if(++len == size) {
+ if(!(line->seq = realloc(line->seq, (size <<= 1)))) {
+ ERROR();
} else {
- break;
+ ptr = line->seq + len;
+ line->size = size;
}
}
- if(line[i] != stopChar) {
- line_size *= 2;
- line = realloc(line, line_size);
- } else {
- stop = 1;
+ if((*ptr++ = c) == stopChar) {
+ *--ptr = 0;
+ line->len = --len;
+ return 1;
}
}
- line[i] = '\0';
- return line_size;
+ *ptr = 0;
+ line->len = len;
+
+ return 0;
}
+char * stripSpace(const unsigned char *src) {
+
+ char *ptr;
+
+ ptr = (char *) src;
+ while(*ptr != 0 && isspace(*ptr)) {
+ ++ptr;
+ }
+
+ return ptr;
+}
+
+#define skipLine(file, c, stopChar) while((c = fgetc(file)) != EOF && c != stopChar)
+
void helpMessage(int exeStatus) {
FILE *helpOut;
if(exeStatus == 0) {
@@ -93,35 +106,10 @@ void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\t\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-s_db\t\tSpecies DB\t\t\t\t\t\tREQUIRED\n");
fprintf(helpOut, "#\t-id\t\tID threshhold\t\t\t70.0\n");
+ fprintf(helpOut, "#\t-hid\t\tHard ID threshhold\t\t\t1.0\n");
fprintf(helpOut, "#\t-dct\t\tDepth correction threshhold\t0.1\n");
fprintf(helpOut, "#\t-kma\t\talternative KMA\t\tkma\n");
- fprintf(helpOut, "#\n# KMA options:\t\tDesc:\t\t\t\tDefault:\t\tRequirements:\n");
- fprintf(helpOut, "#\t-i\t\tInput/query file name\t\tNone\t\tREQUIRED\n");
- fprintf(helpOut, "#\t-o\t\tOutput file\t\t\tNone\t\tREQUIRED\n");
- fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\tNone\t\tREQUIRED\n");
- fprintf(helpOut, "#\t-k\t\tKmersize\t\t\t16\n");
- fprintf(helpOut, "#\t-e\t\tevalue\t\t\t\t0.05\n");
- fprintf(helpOut, "#\t-delta\t\tAlign in pieces of delta\t511\n");
- fprintf(helpOut, "#\t-mem_mode\tUse kmers to choose best\n#\t\t\ttemplate, and save memory\tFalse\n");
- fprintf(helpOut, "#\t-ex_mode\tSearh kmers exhaustively\tFalse\n");
- fprintf(helpOut, "#\t-deCon\t\tRemove contamination\t\tFalse\n");
- fprintf(helpOut, "#\t-dense\t\tDo not allow insertions\n#\t\t\tin assembly\t\t\tFalse\n");
- fprintf(helpOut, "#\t-ref_fsa\tConsensus sequnce will\n#\t\t\thave \"n\" instead of gaps\tFalse\n");
- fprintf(helpOut, "#\t-matrix\t\tPrint assembly matrix\t\tFalse\n");
- fprintf(helpOut, "#\t-mp\t\tMinimum phred score\t\t30\n");
- fprintf(helpOut, "#\t-5p\t\tCut a constant number of\n#\t\t\tnucleotides from the 5 prime.\t0\n");
- fprintf(helpOut, "#\t-Sparse\t\tRun KmerFinder\t\t\tFalse\n");
- fprintf(helpOut, "#\t-ID\t\tMinimum ID\t\t\t1.0%%\n");
- fprintf(helpOut, "#\t-ss\t\tSparse sorting (q,c,d)\t\tq\n");
- fprintf(helpOut, "#\t-shm\t\tUse shared DB made by kma_shm\t0 (lvl)\n");
- fprintf(helpOut, "#\t-swap\t\tSwap DB to disk\t\t\t0 (lvl)\n");
- fprintf(helpOut, "#\t-1t1\t\tSkip HMM\t\t\tFalse\n");
- fprintf(helpOut, "#\t-boot\t\tBootstrap sequence\t\tFalse\n");
- fprintf(helpOut, "#\t-mrs\t\tMinimum alignment score,\n#\t\t\tnormalized to alignment length\t0.5\n");
- fprintf(helpOut, "#\t-reward\t\tScore for match\t\t\t1\n");
- fprintf(helpOut, "#\t-penalty\tPenalty for mismatch\t\t-2\n");
- fprintf(helpOut, "#\t-gapopen\tPenalty for gap opening\t\t-3\n");
- fprintf(helpOut, "#\t-gapextend\tPenalty for gap extension\t-1\n");
+ fprintf(helpOut, "#\t-v\t\tVersion\n");
fprintf(helpOut, "#\t-h\t\tShows this help message\n");
fprintf(helpOut, "#\n");
exit(exeStatus);
@@ -129,22 +117,22 @@ void helpMessage(int exeStatus) {
int main(int argc, char *argv[]) {
- int i, args, KMA_call_len, pos, line_size, out_len, HMM, stop;
- char *outputfilename, *t_db, *s_db, *KMA_dest, *KMA_call, *line, *q_id;
- char *template_name, *score, *expected, *t_len, *q, *p, *t_cov, *q_cov;
- double depth_corr_t, ID_t, ID, depth, depth_corr, expDepth;
+ int args, KMA_call_len, out_len, stop;
+ char *outputfilename, *t_db, *s_db, *KMA_dest, *KMA_call, *exeBasic;
+ double depth_corr_t, ID_t, HID_t, ID, depth, depth_corr, expDepth;
FILE *output, *input;
+ Qseqs *line, *template_name, *score, *expected, *t_len, *q, *p, *t_cov;
+ Qseqs *q_cov, *q_id;
+
/* set defaults */
depth_corr_t = 0.1;
ID_t = 70.0;
+ HID_t = 1.0;
outputfilename = 0;
t_db = 0;
s_db = 0;
- KMA_dest = strdup("kma");
- if(!KMA_dest) {
- fprintf(stderr, "OOM\n");
- exit(-1);
- }
+ KMA_dest = smalloc(4);
+ strcpy(KMA_dest, "kma");
KMA_call_len = 11;
/* parse cmd-line */
@@ -155,7 +143,10 @@ int main(int argc, char *argv[]) {
if(args < argc) {
KMA_call_len += (strlen(argv[args]) + 4);
out_len = strlen(argv[args]);
- outputfilename = (char*) calloc(out_len + strlen(".KmerRes") + 1, sizeof(char));
+ outputfilename = (char*) calloc(out_len + 16, sizeof(char));
+ if(!outputfilename) {
+ ERROR();
+ }
strcpy(outputfilename, argv[args]);
strcat(outputfilename, ".KmerRes");
}
@@ -163,6 +154,9 @@ int main(int argc, char *argv[]) {
args++;
if(args < argc) {
t_db = (char*) calloc(strlen(argv[args]) + 1, sizeof(char));
+ if(!t_db) {
+ ERROR();
+ }
strcpy(t_db, argv[args]);
KMA_call_len += (strlen(argv[args]) + 1);
}
@@ -170,26 +164,47 @@ int main(int argc, char *argv[]) {
args++;
if(args < argc) {
s_db = (char*) calloc(strlen(argv[args]) + 1, sizeof(char));
+ if(!s_db) {
+ ERROR();
+ }
strcpy(s_db, argv[args]);
KMA_call_len += (strlen(argv[args]) + 1);
}
} else if(strcmp(argv[args], "-id") == 0) {
args++;
if(args < argc) {
- ID_t = atof(argv[args]);
+ ID_t = strtod(argv[args], &exeBasic);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "%s\n", exeBasic);
+ exit(1);
+ } else if(ID_t < 0) {
+ fprintf(stderr, "Invalid threshold\n");
+ exit(1);
+ }
}
- if(ID_t < 0) {
- fprintf(stderr, "Invalid threshold\n");
- exit(1);
+ } else if(strcmp(argv[args], "-hid") == 0) {
+ args++;
+ if(args < argc) {
+ HID_t = strtod(argv[args], &exeBasic);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "%s\n", exeBasic);
+ exit(1);
+ } else if(HID_t < 0) {
+ fprintf(stderr, "Invalid threshold\n");
+ exit(1);
+ }
}
} else if(strcmp(argv[args], "-dct") == 0) {
args++;
if(args < argc) {
- depth_corr_t = atof(argv[args]);
- }
- if(depth_corr_t < 0) {
- fprintf(stderr, "Invalid threshold\n");
- exit(1);
+ depth_corr_t = strtod(argv[args], &exeBasic);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "%s\n", exeBasic);
+ exit(1);
+ } else if(depth_corr_t < 0) {
+ fprintf(stderr, "Invalid threshold\n");
+ exit(1);
+ }
}
} else if(strcmp(argv[args], "-kma") == 0) {
args++;
@@ -197,24 +212,24 @@ int main(int argc, char *argv[]) {
free(KMA_dest);
KMA_dest = strdup(argv[args]);
if(!KMA_dest) {
- fprintf(stderr, "OOM\n");
- exit(-1);
+ fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ exit(errno);
}
KMA_call_len += strlen(argv[args]);
}
+ } else if(strcmp(argv[args], "-v") == 0) {
+ fprintf(stderr, "KmerResistance-%s\n", VERSION);
+ exit(0);
} else if(strcmp(argv[args], "-h") == 0) {
helpMessage(0);
} else {
KMA_call_len += strlen(argv[args]) + 1;
- if(HMM == 0 && strcmp(argv[args], "-i") == 0) {
+ if(strcmp(argv[args], "-i") == 0) {
stop = 0;
args++;
while(stop == 0 && args < argc) {
- if(strncmp(argv[args], "-", 1) != 0) {
+ if(*(argv[args]) != '-') {
KMA_call_len += strlen(argv[args]) + 1;
- if(strpos_last(argv[args], "fsa") != -1 || strpos_last(argv[args], "fna") != -1 || strpos_last(argv[args], "fasta") != -1) {
- HMM = 1;
- }
args++;
} else {
stop = 1;
@@ -231,18 +246,12 @@ int main(int argc, char *argv[]) {
helpMessage(1);
}
- line_size = 4096 * sizeof(char);
- line = malloc(line_size);
-
/* prepare KMA alignment */
- KMA_call_len += 5;
- pos = strpos_last(argv[0], "/") + 1;
- KMA_call = calloc(KMA_call_len + pos + 2, sizeof(char));
+ KMA_call_len += 8;
+ KMA_call = smalloc(KMA_call_len + strpos_last(*argv, "/") + 4);
/* add options to KMA */
- *KMA_call = 0;
- strcat(KMA_call, KMA_dest);
- strcat(KMA_call, " ");
+ KMA_call_len = sprintf(KMA_call, "%s ", KMA_dest);
args = 1;
while(args < argc) {
if(strcmp(argv[args], "-t_db") == 0) {
@@ -255,137 +264,126 @@ int main(int argc, char *argv[]) {
args++;
} else if(strcmp(argv[args], "-kma") == 0) {
args++;
+ } else if(strcmp(argv[args], "-Sparse") == 0) {
+ args++;
} else if(strcmp(argv[args], "-h") != 0) {
- strcat(KMA_call, argv[args]);
- strcat(KMA_call, " ");
+ KMA_call_len += sprintf(KMA_call + KMA_call_len, "%s ", argv[args]);
}
args++;
}
- strcat(KMA_call, "-t_db ");
+ KMA_call_len += sprintf(KMA_call + KMA_call_len, "-t_db ");
- /* mapp genes */
- fprintf(stderr, "# Mapping gene data.\n");
- KMA_call_len = strlen(KMA_call);
+ /* map genes */
+ fprintf(stderr, "# Aligning gene data.\n");
sprintf(KMA_call, "%s%s", KMA_call, t_db);
- //fprintf(stderr, "%s\n", KMA_call);
- input = popen(KMA_call, "r");
- if(!input) {
- fprintf(stderr, "Invalid KMA specified\n");
- exit(-1);
- }
- if(pclose(input) != 0) {
- fprintf(stderr, "# Gene mapping failed\n");
+ if(system(KMA_call) != 0) {
+ fprintf(stderr, "# Gene alignment failed\n");
exit(1);
}
- KMA_call[KMA_call_len] = '\0';
+ KMA_call[KMA_call_len] = 0;
- /* mapp species */
+ /* map species */
fprintf(stderr, "# Finding species\n");
- sprintf(KMA_call, "%s%s -Sparse\n", KMA_call, s_db);
- KMA_call[strpos_last(KMA_call, "\n")] = '\0';
- input = popen(KMA_call, "r");
- if(!input) {
- fprintf(stderr, "Invalid KMA specified\n");
- exit(-1);
- }
- if(pclose(input) != 0) {
+ sprintf(KMA_call, "%s%s -Sparse", KMA_call, s_db);
+ if(system(KMA_call) != 0) {
fprintf(stderr, "# Species mapping failed\n");
exit(1);
}
- KMA_call[KMA_call_len] = '\0';
+ KMA_call[KMA_call_len] = 0;
/* correlate species */
fprintf(stderr, "# Mapping done\n");
fprintf(stderr, "# Correlating results\n");
- template_name = malloc(256 * sizeof(char));
- score = malloc(256 * sizeof(char));
- expected = malloc(256 * sizeof(char));
- t_len = malloc(256 * sizeof(char));
- q = malloc(256 * sizeof(char));
- p = malloc(256 * sizeof(char));
- t_cov = malloc(256 * sizeof(char));
- q_id = malloc(256 * sizeof(char));
- q_cov = malloc(256 * sizeof(char));
-
- output = fopen(outputfilename, "w");
- if(!output) {
- fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
+ line = setQseqs(256);
+ template_name = setQseqs(256);
+ score = setQseqs(32);
+ expected = setQseqs(32);
+ t_len = setQseqs(32);
+ q = setQseqs(32);
+ p = setQseqs(32);
+ t_cov = setQseqs(32);
+ q_id = setQseqs(32);
+ q_cov = setQseqs(32);
+ output = sfopen(outputfilename, "wb");
fprintf(output, "#Template\tScore\tExpected\ttemplate length\tq_value\tp_value\ttemplate_id\ttemplate_coverage\tquery_id\tquery_coverage\tdepth\tdepth_corr\n");
- strncpy((outputfilename + out_len), ".spa", 4);
- outputfilename[out_len + 4] = '\0';
- input = fopen(outputfilename, "r");
- if(!output) {
- fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
+ strcpy((outputfilename + out_len), ".spa");
+ input = sfopen(outputfilename, "rb");
/* retrieve species information */
- fgets(line, line_size, input);
- fget_to(template_name, 256, input, '\t');
- line_size = fget_to(line, line_size, input, '\t');
- fget_to(score, 256, input, '\t');
- fget_to(expected, 256, input, '\t');
- fget_to(t_len, 256, input, '\t');
-
- fget_to(q_cov, 256, input, '\t');
-
- fget_to(t_cov, 256, input, '\t');
- ID = atof(t_cov);
-
- line_size = fget_to(line, line_size, input, '\t');
- expDepth = atof(line);
- line_size = fget_to(line, line_size, input, '\t');
- line_size = fget_to(line, line_size, input, '\t');
- line_size = fget_to(line, line_size, input, '\t');
- fget_to(q, 256, input, '\t');
- fget_to(p, 256, input, '\n');
- ID_t *= (1 - pow(2.718281828, (-0.5) * expDepth));
- fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name, score, expected, t_len, q, p, ID, t_cov, q_cov, q_cov, expDepth, (1 - pow(2.718281828, -1)));
+ skipLine(input, args, '\n');
+ fget2(template_name, input, '\t');
+ if(template_name->len) {
+ skipLine(input, args, '\t');
+ fget2(score, input, '\t');
+ fget2(expected, input, '\t');
+ fget2(t_len, input, '\t');
+ fget2(q_cov, input, '\t');
+ fget2(t_cov, input, '\t');
+ ID = strtod(stripSpace(t_cov->seq), &exeBasic);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "%s\n", exeBasic);
+ exit(1);
+ }
+
+ fget2(line, input, '\t');
+ expDepth = strtod(stripSpace(line->seq), &exeBasic);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "%s\n", exeBasic);
+ exit(1);
+ }
+ skipLine(input, args, '\t');
+ skipLine(input, args, '\t');
+ skipLine(input, args, '\t');
+ fget2(q, input, '\t');
+ fget2(p, input, '\n');
+ ID_t *= (1 - pow(2.718281828, (-0.5) * expDepth));
+ ID_t = ID_t < HID_t ? HID_t : ID_t;
+ fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name->seq, score->seq, expected->seq, t_len->seq, q->seq, p->seq, ID, t_cov->seq, q_cov->seq, q_cov->seq, expDepth, (1 - pow(2.718281828, -1)));
+ } else {
+ fprintf(stderr, "Could not determine host.\n");
+ expDepth = 1.0e-6;
+ ID_t = HID_t;
+ }
fclose(input);
- fprintf(stderr, "# Adjusted ID threshold: %f\n", ID_t);
+ fprintf(stderr, "# Adjusted ID threshold: %.2f\n", ID_t);
/* retrieve template information */
- strncpy((outputfilename + out_len), ".res", 4);
- outputfilename[out_len + 4] = '\0';
- input = fopen(outputfilename, "r");
- if(!input) {
- fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
-
- fgets(line, line_size, input);
- while(! feof(input)) {
- fget_to(template_name, 256, input, '\t');
- fget_to(score, 256, input, '\t');
- fget_to(expected, 256, input, '\t');
- fget_to(t_len, 256, input, '\t');
- line_size = fget_to(line, line_size, input, '\t');
- ID = atof(line);
-
- fget_to(t_cov, 256, input, '\t');
- fget_to(q_id, 256, input, '\t');
- fget_to(q_cov, 256, input, '\t');
-
-
-
- line_size = fget_to(line, line_size, input, '\t');
- depth = atof(line);
- fget_to(q, 256, input, '\t');
- fget_to(p, 256, input, '\n');
- //fgets(line, line_size, input);
+ strcpy((outputfilename + out_len), ".res");
+ input = sfopen(outputfilename, "rb");
+ skipLine(input, args, '\n');
+ while(!feof(input)) {
+ fget2(template_name, input, '\t');
+ fget2(score, input, '\t');
+ fget2(expected, input, '\t');
+ fget2(t_len, input, '\t');
+ fget2(line, input, '\t');
+ ID = strtod(stripSpace(line->seq), &exeBasic);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "%s\n", exeBasic);
+ exit(1);
+ }
+ fget2(t_cov, input, '\t');
+ fget2(q_id, input, '\t');
+ fget2(q_cov, input, '\t');
+ fget2(line, input, '\t');
+ depth = strtod(stripSpace(line->seq), &exeBasic);
+ if(*exeBasic != 0) {
+ fprintf(stderr, "%s\n", exeBasic);
+ exit(1);
+ }
+ fget2(q, input, '\t');
+ fget2(p, input, '\n');
depth_corr = (1 - pow(2.718281828, (-1) * depth / expDepth));
- if(depth_corr > depth_corr_t && ID > ID_t) {
- fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name, score, expected, t_len, q, p, ID, t_cov, q_id, q_cov, depth, depth_corr);
+ if(depth_corr_t <= depth_corr && ID_t <= ID) {
+ fprintf(output, "%s\t%s\t%s\t%s\t%s\t%s\t%3.2f\t%s\t%s\t%s\t%5.2f\t%1.4f\n", template_name->seq, score->seq, expected->seq, t_len->seq, q->seq, p->seq, ID, t_cov->seq, q_id->seq, q_cov->seq, depth, depth_corr);
}
}
fclose(input);
-
- fprintf(stdout, "DONE");
fclose(output);
- exit(0);
+ fprintf(stderr, "# DONE\n");
+
+ return 0;
}
=====================================
Makefile
=====================================
@@ -0,0 +1,17 @@
+CFLAGS = -Wall -O3 -std=c99
+BINS = kmerresistance
+LIBS = pherror.o qseqs.o
+
+.c .o:
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+all: $(BINS)
+
+kmerresistance: KmerResistance.c $(LIBS)
+ $(CC) $(CFLAGS) -o $@ $< $(LIBS) -lm
+
+clean:
+ $(RM) $(BINS) $(LIBS)
+
+pherror.o: pherror.h
+qseqs.o: qseqs.h pherror.h
=====================================
README.md
=====================================
@@ -1,121 +1,80 @@
-/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+# Getting Started #
+```
+git clone https://bitbucket.org/genomicepidemiology/kmerresistance.git
+cd kmerresistance && make;
-/*
- Copyright (c) 2017, Philip Clausen, Technical University of Denmark
- All rights reserved.
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-
-Introduction
+./kmerresistance -i reads_se.fq.gz -o output/name -t_db templates -s_db species
+./kmerresistance -ipe reads_1.fq.gz reads_2.fq.gz -o output/name -t_db templates -s_db species
+```
+# Introduction #
KmerResistance correlates mapped genes with the predicted species of WGS
samples, where this this allows for identification of genes in samples which
have been poorly sequenced or high accuracy predictions for samples with
contamination.
KmerResistance has one dependency, namely KMA to perform the mapping, which is
also freely available.
-
-If you use KmerResistance for your published research, then please cite the
-KmerResistance and KMA paper.
-
-
-Compilation
-
-Any c-compiler can be used, here GCC.
-gcc -O3 -o kmerresistance KmerResistance.c -lm
-
-
-Indexing
+If you use KmerResistance for your published research, then please cite:
+Philip T.L.C. Clausen, Ea Zankari, Frank M. Aarestrup & Ole Lund,
+"Benchmarking of methods for identification of antimicrobial resistance genes in bacterial whole genome data",
+J Antimicrob Chemother. 2016 Sep;71(9):2484-8.
+Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund,
+"Rapid and precise alignment of raw reads against redundant databases with KMA",
+BMC Bioinformatics, 2018;19:307.
+
+# Indexing #
Databases has to set up with KMA. The resistance genes needs standard indexing
while the species database needs to Sparse.
-# Templates, e.g. resistance genes:
-kma_index -i ResFinder.fsa -o ResFinder
-
-# Species database (or other host database):
-kma_index -i bacteria.fsa -o bacteria -Sparse ATG
-
-
-Mapping with KmerResistance
-
-# KmerResistance-2.0 correlates genes to species from WGS data, by mapping reads with KMA
-# Options are: Desc: Default: Requirements:
-#
-# -i Input/query file name REQUIRED
-# -o Output file REQUIRED
-# -t_db Template DB REQUIRED
-# -s_db Species DB REQUIRED
-# -t_k Template kmersize 16
-# -s_k Species kmersize 16
-# -id ID threshhold 80.0
-# -dct Depth correction 0.1
-# -kma alternative KMA kma
-#
-# KMA options: Desc: Default: Requirements:
-# -i Input/query file name None REQUIRED
-# -o Output file None REQUIRED
-# -t_db Template DB None REQUIRED
-# -k Kmersize 16
-# -e evalue 0.05
-# -delta Allocation size for sequences 512
-# -mem_mode Use kmers to choose best
-# template, and save memory False
-# -ex_mode Searh kmers exhaustively False
-# -deCon Remove contamination False
-# -dense Do not allow insertions
-# in assembly False
-# -ref_fsa Consensus sequnce will
-# have "n" instead of gaps False
-# -matrix Print assembly matrix False
-# -mp Minimum phred score 30
-# -5p Cut a constant number of
-# nucleotides from the 5 prime. 0
-# -CS Chain size 8 MB
-# -MS Max chain size 14 GB
-# -Sparse Run KmerFinder False
-# -ID Minimum ID 1.0%
-# -ss Sparse sorting (q,c,d) q
-# -NW Use Needleman-Wunsch False
-# -shm Use shared DB made by kma_ssm False
-# -1t1 Skip HMM False
-# -mrs Minimum alignment score score,
-# normalized to alignment length 0.0
-# -h Shows this help message
-#
-
-
-Mapping reads against resistance genes:
-kmerresistance -i sample_1.fastq sample_2.fastq -o out /
--t_db ResFinder -s_db bacteria
-
-
-Output:
-*.KmerRes KmerResistance output
-*.res Result file, containing summary of output.
-*.aln Consensus alignment.
-*.fsa Consensus sequences drawn from mappings.
-*.frag Information about each mapping read, containing:
- Read, #matches, aln score, start, end, template, read name
-*.mat.gz Count of each called nucleotide on each position in all mapped
- templates, requires that the "-matrix" option is enabled when
- mapping. The columns are:
- Ref. nucleotide, #A, #T, #C, #G, #N, #-.
-
-
-Questions
-If you have any questions regarding KmerResistance or KMA, or wishes for
-changes in future versions of KmerResistance or KMA, then feel free to mail me
-at: plan at dtu.dk.
+# Templates, e.g. resistance genes: #
+```
+kma index -i ResFinder.fsa -o ResFinder
+```
+
+# Species database (or other host database): #
+```
+kma index -i bacteria.fsa -o bacteria -Sparse ATG
+```
+
+# Gene identification #
+Aligning reads against resistance genes:
+```
+kmerresistance -i sample_1.fastq sample_2.fastq -o out -t_db ResFinder -s_db bacteria
+```
+
+# Output: #
+1. *.KmerRes KmerResistance output
+2. *.res Result file, containing summary of output.
+3. *.aln Consensus alignment.
+4. *.fsa Consensus sequences drawn from mappings.
+5. *.frag Information about each mapping read, containing: Read, #matches, aln score, start, end, template, read name
+6. *.mat.gz Count of each called nucleotide on each position in all mapped templates, requires that the "-matrix" option is enabled when mapping. The columns are: Ref. nucleotide, #A, #T, #C, #G, #N, #-.
+
+# Installation Requirements #
+In order to run KmerResistance, you will need to install KMA. Available here:
+*https://bitbucket.org/genomicepidemiology/kma.git*
+
+# Help #
+Usage and options are available with the "-h" option on all three programs.
+If in doubt, please mail any concerns or problems to: *plan at dtu.dk*.
+
+# Citation #
+1. Philip T.L.C. Clausen, Ea Zankari, Frank M. Aarestrup & Ole Lund, "Benchmarking of methods for identification of antimicrobial resistance genes in bacterial whole genome data", J Antimicrob Chemother. 2016 Sep;71(9):2484-8.
+2. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", BMC Bioinformatics, 2018;19:307.
+
+# License #
+Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
=====================================
ResFinder.fsa deleted
=====================================
The diff for this file was not included because it is too large.
=====================================
pherror.c
=====================================
@@ -0,0 +1,82 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+#define _XOPEN_SOURCE 600
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "pherror.h"
+
+void * smalloc(const size_t size) {
+
+ void *dest = malloc(size);
+ if(!dest) {
+ ERROR();
+ }
+
+ return dest;
+}
+
+FILE * sfopen(char *filename, char *mode) {
+
+ FILE *file = fopen(filename, mode);
+ if(!file) {
+ fprintf(stderr, "Filename:\t%s\n", filename);
+ ERROR();
+ }
+
+ return file;
+}
+
+void cfread(void *src, size_t size, size_t nmemb, FILE *stream) {
+
+ unsigned char *ptr = src;
+ nmemb = fread(ptr, 1, (size *= nmemb), stream);
+ if((size -= nmemb)) {
+ ptr += nmemb;
+ while(size) {
+ if(0 < (nmemb = read(fileno(stream), ptr, size))) {
+ size -= nmemb;
+ ptr += nmemb;
+ } else if(nmemb == 0 || (nmemb == -1 && (errno & EAGAIN))) {
+ usleep(1000);
+ } else {
+ ERROR();
+ }
+ }
+ }
+}
+
+void cfwrite(const void *src, size_t size, size_t nmemb, FILE *stream) {
+
+ unsigned char *ptr;
+
+ size *= nmemb;
+ ptr = (unsigned char *)(src);
+ while(size) {
+ nmemb = fwrite(ptr, 1, size, stream);
+ if(nmemb == 0) {
+ ERROR();
+ }
+ size -= nmemb;
+ ptr += nmemb;
+ }
+
+}
=====================================
pherror.h
=====================================
@@ -0,0 +1,37 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#define _XOPEN_SOURCE 600
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+/* errno error message */
+#define ERROR() fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno)); exit(errno);
+
+/* succes or exit */
+#define sfwrite(ptr, size, nmemb, stream) if(fwrite(ptr, size, nmemb, stream) != nmemb) {if(errno) {ERROR();} else {fprintf(stderr, "Writing error.\n"); exit(1);}}
+#define sfread(ptr, size, nmemb, stream) if(fread(ptr, size, nmemb, stream) != nmemb) {if(errno) {ERROR();} else {fprintf(stderr, "Reading error.\n"); exit(1);}}
+void * smalloc(const size_t size);
+FILE * sfopen(char *filename, char *mode);
+
+/* cyclic */
+void cfread(void *src, size_t size, size_t nmemb, FILE *stream);
+void cfwrite(const void *src, size_t size, size_t nmemb, FILE *stream);
=====================================
qseqs.c
=====================================
@@ -0,0 +1,39 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <stdlib.h>
+#include "pherror.h"
+#include "qseqs.h"
+
+Qseqs * setQseqs(int size) {
+
+ Qseqs *dest;
+
+ dest = smalloc(sizeof(Qseqs));
+ dest->len = 0;
+ dest->size = size;
+ dest->seq = smalloc(size);
+
+ return dest;
+}
+
+void destroyQseqs(Qseqs *dest) {
+ free(dest->seq);
+ free(dest);
+}
=====================================
qseqs.h
=====================================
@@ -0,0 +1,33 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#ifndef QSEQS
+typedef struct qseqs Qseqs;
+struct qseqs {
+ int size;
+ int len;
+ unsigned char *seq;
+};
+#define QSEQS 1
+#endif
+
+/* initialize Qseqs */
+Qseqs * setQseqs(int size);
+/* destroy Qseqs */
+void destroyQseqs(Qseqs *dest);
View it on GitLab: https://salsa.debian.org/med-team/kmerresistance/compare/cf965b48201cb0d9df90b33700e1b4680829f5cd...fcf76aa9b300b07c5b58aef039c553b0936cca0d
--
View it on GitLab: https://salsa.debian.org/med-team/kmerresistance/compare/cf965b48201cb0d9df90b33700e1b4680829f5cd...fcf76aa9b300b07c5b58aef039c553b0936cca0d
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190624/4d66b230/attachment-0001.html>
More information about the debian-med-commit
mailing list