[med-svn] [Git][med-team/kma][master] 7 commits: Update upstream source from tag 'upstream/0.14.4+git20190102.6da80f4'
Andreas Tille
gitlab at salsa.debian.org
Tue Feb 19 16:08:15 GMT 2019
Andreas Tille pushed to branch master at Debian Med / kma
Commits:
02aede24 by Andreas Tille at 2019-02-13T18:20:24Z
Update upstream source from tag 'upstream/0.14.4+git20190102.6da80f4'
Update to upstream version '0.14.4+git20190102.6da80f4'
with Debian dir d4f2ad6089b0b18e59d10f95d9902987507f86a1
- - - - -
a86e157b by Andreas Tille at 2019-02-13T18:20:24Z
New upstream version 0.14.4+git20190102.6da80f4
- - - - -
ab7cba95 by Andreas Tille at 2019-02-13T18:25:25Z
Upstream is now tagging releases
- - - - -
133694bd by Andreas Tille at 2019-02-13T18:43:03Z
New upstream version 1.1.7
- - - - -
54b86713 by Andreas Tille at 2019-02-13T18:43:03Z
Update upstream source from tag 'upstream/1.1.7'
Update to upstream version '1.1.7'
with Debian dir bd53c6b43293d1faf38501dbb0489a72c46f8557
- - - - -
8408387a by Andreas Tille at 2019-02-13T19:13:50Z
Adapt patch
- - - - -
e8f48865 by Andreas Tille at 2019-02-19T16:07:20Z
Refresh manpages
- - - - -
13 changed files:
- KMA.c
- KMA_SHM.c
- KMA_index.c
- + KMA_update.c
- Makefile
- README.md
- debian/changelog
- debian/control
- debian/kma.1
- debian/kma_index.1
- debian/kma_shm.1
- debian/patches/hardening.patch
- debian/watch
Changes:
=====================================
KMA.c
=====================================
The diff for this file was not included because it is too large.
=====================================
KMA_SHM.c
=====================================
@@ -1,212 +1,261 @@
/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
/*
- Copyright (c) 2017, Philip Clausen, Technical University of Denmark
- All rights reserved.
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
#include <math.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <time.h>
-#include <ctype.h>
-#include <sys/types.h>
-#include <sys/stat.h>
#include <sys/ipc.h>
#include <sys/shm.h>
+#include <time.h>
#include <unistd.h>
-#include <errno.h>
+
+#define HU_LIMIT 65535
+#define U_LIMIT 4294967295
/*
STRUCTURES
*/
struct hashMapKMA {
- /* end product of script */
- unsigned kmersize; // k
- unsigned size; // size of DB
- unsigned n; // k-mers stored
- unsigned null_index; // null value
- unsigned seqsize; // size of seq
- unsigned v_index; // size of values
- unsigned prefix_len; // prefix length
- long unsigned *prefix; // prefix
- unsigned *exist; // size long
- long unsigned *seq; // compressed sequence of k-mers
- unsigned *values; // compressed values
- unsigned *key_index ; // Relative
- unsigned *value_index; // Relative
+ long unsigned size; // size of DB
+ long unsigned n; // k-mers stored
+ long unsigned null_index; // null value
+ long unsigned v_index; // size of values
+ unsigned kmersize; // k
+ unsigned prefix_len; // prefix length
+ long unsigned prefix; // prefix
+ unsigned *exist; // size long
+ long unsigned *exist_l; // size long, big DBs
+ unsigned *values; // compressed values
+ short unsigned *values_s; // compressed values, few templates
+ unsigned *key_index; // Relative
+ long unsigned *key_index_l; // Relative, 16 < k
+ unsigned *value_index; // Relative
+ long unsigned *value_index_l; // Relative, big DBs
};
+int version[3] = {1, 0, 0};
+
/*
FUNCTIONS
*/
void hashMap_shm_detach(struct hashMapKMA *dest) {
shmdt(dest->exist);
- shmdt(dest->seq);
shmdt(dest->values);
shmdt(dest->key_index);
shmdt(dest->value_index);
}
-void hashMapKMA_setupSHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
+int hashMapKMA_setupSHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
- int shmid;
- long unsigned mask;
+ int shmid, kmersize, status;
+ unsigned DB_size;
+ long unsigned mask, size;
key_t key;
/* load sizes */
- fseek(file, sizeof(int), SEEK_CUR);
+ fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
- fread(&dest->n, sizeof(unsigned), 1, file);
- fread(&dest->seqsize, sizeof(unsigned), 1, file);
- fread(&dest->v_index, sizeof(unsigned), 1, file);
- fread(&dest->null_index, sizeof(unsigned), 1, file);
+ fread(&dest->n, sizeof(long unsigned), 1, file);
+ fread(&dest->v_index, sizeof(long unsigned), 1, file);
+ fread(&dest->null_index, sizeof(long unsigned), 1, file);
+ kmersize = dest->kmersize;
+ mask = 0;
+ mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
+ status = 0;
/* check shared memory, else load */
+ size = dest->size;
+ if((dest->size - 1) == mask) {
+ if(dest->v_index <= U_LIMIT) {
+ size *= sizeof(unsigned);
+ } else {
+ size *= sizeof(long unsigned);
+ }
+ } else {
+ if(dest->n <= U_LIMIT) {
+ size *= sizeof(unsigned);
+ } else {
+ size *= sizeof(long unsigned);
+ }
+ }
key = ftok(filename, 'e');
- shmid = shmget(key, dest->size * sizeof(unsigned), IPC_CREAT | 0666);
+ shmid = shmget(key, size, IPC_CREAT | 0666);
if(shmid < 0) {
fprintf(stderr, "Could not setup the shared hashMap e\n");
- fseek(file, dest->size * sizeof(unsigned), SEEK_CUR);
+ fseek(file, size, SEEK_CUR);
dest->exist = 0;
+ status = 1;
} else {
dest->exist = shmat(shmid, NULL, 0);
- fread(dest->exist, sizeof(unsigned), dest->size, file);
+ fread(dest->exist, 1, size, file);
}
- mask = 0;
- mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (dest->kmersize << 1));
-
+ /* values */
+ size = dest->v_index;
+ if(DB_size < HU_LIMIT) {
+ size *= sizeof(short unsigned);
+ } else {
+ size *= sizeof(unsigned);
+ }
+ key = ftok(filename, 'v');
+ shmid = shmget(key, size, IPC_CREAT | 0666);
+ if(shmid < 0) {
+ fprintf(stderr, "Could not setup the shared hashMap v\n");
+ fseek(file, size, SEEK_CUR);
+ dest->values = 0;
+ status = 1;
+ } else {
+ /* found */
+ dest->values = shmat(shmid, NULL, 0);
+ fread(dest->values, 1, size, file);
+ }
if((dest->size - 1) == mask) {
- key = ftok(filename, 'v');
- shmid = shmget(key, dest->v_index * sizeof(unsigned), IPC_CREAT | 0666);
- if(shmid < 0) {
- fprintf(stderr, "Could not setup the shared hashMap v\n");
- fseek(file, dest->v_index * sizeof(unsigned), SEEK_CUR);
- dest->values = 0;
- } else {
- /* found */
- dest->values = shmat(shmid, NULL, 0);
- fread(dest->values, sizeof(unsigned), dest->v_index, file);
- }
+ return status;
+ }
+
+ /* kmers */
+ size = dest->n + 1;
+ if(dest->kmersize <= 16) {
+ size *= sizeof(unsigned);
} else {
- key = ftok(filename, 's');
- shmid = shmget(key, dest->seqsize * sizeof(long unsigned), IPC_CREAT | 0666);
- if(shmid < 0) {
- fprintf(stderr, "Could not setup the shared hashMap s\n");
- fseek(file, dest->seqsize * sizeof(long unsigned), SEEK_CUR);
- dest->seq = 0;
- } else {
- /* found */
- dest->seq = shmat(shmid, NULL, 0);
- fread(dest->seq, sizeof(long unsigned), dest->seqsize, file);
- }
- key = ftok(filename, 'v');
- shmid = shmget(key, dest->v_index * sizeof(unsigned), IPC_CREAT | 0666);
- if(shmid < 0) {
- fprintf(stderr, "Could not setup the shared hashMap v\n");
- fseek(file, dest->v_index * sizeof(unsigned), SEEK_CUR);
- dest->values = 0;
- } else {
- /* found */
- dest->values = shmat(shmid, NULL, 0);
- fread(dest->values, sizeof(unsigned), dest->v_index, file);
- }
- key = ftok(filename, 'k');
- shmid = shmget(key, (dest->n + 1) * sizeof(unsigned), IPC_CREAT | 0666);
- if(shmid < 0) {
- fprintf(stderr, "Could not setup the shared hashMap k\n");
- fseek(file, (dest->n + 1) * sizeof(unsigned), SEEK_CUR);
- dest->key_index = 0;
- } else {
- /* found */
- dest->key_index = shmat(shmid, NULL, 0);
- fread(dest->key_index, sizeof(unsigned), dest->n + 1, file);
- }
- key = ftok(filename, 'i');
- shmid = shmget(key, dest->n * sizeof(unsigned), IPC_CREAT | 0666);
- if(shmid < 0) {
- fprintf(stderr, "Could not setup the shared hashMap i\n");
- fseek(file, dest->n * sizeof(unsigned), SEEK_CUR);
- dest->value_index = 0;
- } else {
- /* found */
- dest->value_index = shmat(shmid, NULL, 0);
- fread(dest->value_index, sizeof(unsigned), dest->n, file);
- }
+ size *= sizeof(long unsigned);
+ }
+ key = ftok(filename, 'k');
+ shmid = shmget(key, size, IPC_CREAT | 0666);
+ if(shmid < 0) {
+ fprintf(stderr, "Could not setup the shared hashMap k\n");
+ fseek(file, size, SEEK_CUR);
+ dest->values = 0;
+ status = 1;
+ } else {
+ /* found */
+ dest->key_index = shmat(shmid, NULL, 0);
+ fread(dest->key_index, 1, size, file);
}
+
+ /* value indexes */
+ size = dest->n;
+ if(dest->v_index < U_LIMIT) {
+ size *= sizeof(unsigned);
+ } else {
+ size *= sizeof(long unsigned);
+ }
+ key = ftok(filename, 'i');
+ shmid = shmget(key, size, IPC_CREAT | 0666);
+ if(shmid < 0) {
+ fprintf(stderr, "Could not setup the shared hashMap i\n");
+ fseek(file, size, SEEK_CUR);
+ dest->value_index = 0;
+ status = 1;
+ } else {
+ /* found */
+ dest->value_index = shmat(shmid, NULL, 0);
+ fread(dest->value_index, 1, size, file);
+ }
+
+ return status;
}
void hashMapKMA_destroySHM(struct hashMapKMA *dest, FILE *file, const char *filename) {
- int shmid;
- long unsigned mask;
+ int shmid, kmersize;
+ unsigned DB_size;
+ long unsigned mask, size;
key_t key;
/* load sizes */
- fseek(file, sizeof(int), SEEK_CUR);
+ fread(&DB_size, sizeof(unsigned), 1, file);
fread(&dest->kmersize, sizeof(unsigned), 1, file);
fread(&dest->prefix_len, sizeof(unsigned), 1, file);
fread(&dest->prefix, sizeof(long unsigned), 1, file);
fread(&dest->size, sizeof(long unsigned), 1, file);
- fread(&dest->n, sizeof(unsigned), 1, file);
- fread(&dest->seqsize, sizeof(unsigned), 1, file);
- fread(&dest->v_index, sizeof(unsigned), 1, file);
- fread(&dest->null_index, sizeof(unsigned), 1, file);
+ fread(&dest->n, sizeof(long unsigned), 1, file);
+ fread(&dest->v_index, sizeof(long unsigned), 1, file);
+ fread(&dest->null_index, sizeof(long unsigned), 1, file);
+ kmersize = dest->kmersize;
+ mask = 0;
+ mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
/* check shared memory, and destroy */
+ size = dest->size;
+ if((dest->size - 1) == mask) {
+ if(dest->v_index <= U_LIMIT) {
+ size *= sizeof(unsigned);
+ } else {
+ size *= sizeof(long unsigned);
+ }
+ } else {
+ if(dest->n <= U_LIMIT) {
+ size *= sizeof(unsigned);
+ } else {
+ size *= sizeof(long unsigned);
+ }
+ }
key = ftok(filename, 'e');
- shmid = shmget(key, dest->size * sizeof(unsigned), 0666);
+ shmid = shmget(key, size, 0666);
if(shmid >= 0) {
shmctl(shmid, IPC_RMID, NULL);
}
- mask = 0;
- mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (dest->kmersize << 1));
+ /* values */
+ size = dest->v_index;
+ if(DB_size < HU_LIMIT) {
+ size *= sizeof(short unsigned);
+ } else {
+ size *= sizeof(unsigned);
+ }
+ key = ftok(filename, 'v');
+ shmid = shmget(key, size, 0666);
+ if(shmid >= 0) {
+ shmctl(shmid, IPC_RMID, NULL);
+ }
- if((dest->size - 1) == mask) {
- key = ftok(filename, 'v');
- shmid = shmget(key, dest->v_index * sizeof(unsigned), 0666);
- if(shmid >= 0) {
- shmctl(shmid, IPC_RMID, NULL);
- }
+ /* kmers */
+ size = dest->n + 1;
+ if(dest->kmersize <= 16) {
+ size *= sizeof(unsigned);
} else {
- key = ftok(filename, 's');
- shmid = shmget(key, dest->seqsize * sizeof(long unsigned), 0666);
- if(shmid >= 0) {
- shmctl(shmid, IPC_RMID, NULL);
- }
- key = ftok(filename, 'v');
- shmid = shmget(key, dest->v_index * sizeof(unsigned), 0666);
- if(shmid >= 0) {
- shmctl(shmid, IPC_RMID, NULL);
- }
- key = ftok(filename, 'k');
- shmid = shmget(key, (dest->n + 1) * sizeof(unsigned), 0666);
- if(shmid >= 0) {
- shmctl(shmid, IPC_RMID, NULL);
- }
- key = ftok(filename, 'i');
- shmid = shmget(key, dest->n * sizeof(unsigned), 0666);
- if(shmid >= 0) {
- shmctl(shmid, IPC_RMID, NULL);
- }
+ size *= sizeof(long unsigned);
+ }
+ key = ftok(filename, 'k');
+ shmid = shmget(key, size, 0666);
+ if(shmid >= 0) {
+ shmctl(shmid, IPC_RMID, NULL);
+ }
+
+ /* value indexes */
+ size = dest->n;
+ if(dest->v_index < U_LIMIT) {
+ size *= sizeof(unsigned);
+ } else {
+ size *= sizeof(long unsigned);
+ }
+ key = ftok(filename, 'i');
+ shmid = shmget(key, size, 0666);
+ if(shmid >= 0) {
+ shmctl(shmid, IPC_RMID, NULL);
}
}
@@ -354,7 +403,7 @@ char * name_setupSHM(FILE *file, const char *filename) {
} else {
template_names = shmat(shmid, NULL, 0);
fread(template_names, 1, size, file);
- for(i = 0; i < size; i++) {
+ for(i = 0; i < size; ++i) {
if(template_names[i] == '\n') {
template_names[i] = 0;
}
@@ -395,6 +444,7 @@ void helpMessage(int exeStatus) {
fprintf(helpOut, "#\t-destroy\tDestroy shared DB\t\tFalse\n");
fprintf(helpOut, "#\t-shmLvl\t\tLevel of shared memory\t\t1\n");
fprintf(helpOut, "#\t-shm-h\t\tExplain shm levels\n");
+ fprintf(helpOut, "#\t-v\t\tVersion\n");
fprintf(helpOut, "#\t-h\t\tShows this help message\n");
fprintf(helpOut, "#\n");
exit(exeStatus);
@@ -402,7 +452,7 @@ void helpMessage(int exeStatus) {
int main(int argc, char *argv[]) {
- int args, file_len, destroy, *template_lengths, *index;
+ int args, file_len, destroy, status, *template_lengths, *index;
unsigned shmLvl;
long unsigned *seq;
char *templatefilename, *template_names;
@@ -413,12 +463,13 @@ int main(int argc, char *argv[]) {
templatefilename = 0;
destroy = 0;
shmLvl = 1;
+ status = 0;
/* PARSE COMMAND LINE OPTIONS */
args = 1;
while(args < argc) {
if(strcmp(argv[args], "-t_db") == 0) {
- args++;
+ ++args;
if(args < argc) {
templatefilename = malloc(strlen(argv[args]) + 64);
if(!templatefilename) {
@@ -430,7 +481,7 @@ int main(int argc, char *argv[]) {
} else if(strcmp(argv[args], "-destroy") == 0) {
destroy = 1;
} else if(strcmp(argv[args], "-shmLvl") == 0) {
- args++;
+ ++args;
if(args < argc) {
shmLvl = atoi(argv[args]);
if(!shmLvl) {
@@ -438,6 +489,9 @@ int main(int argc, char *argv[]) {
exit(0);
}
}
+ } else if(strcmp(argv[args], "-v") == 0) {
+ fprintf(stdout, "KMA_SHM-%d.%d.%d\n", version[0], version[1], version[2]);
+ exit(0);
} else if(strcmp(argv[args], "-h") == 0) {
helpMessage(0);
} else if(strcmp(argv[args], "-shm-h") == 0) {
@@ -457,7 +511,7 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "# Printing help message:\n");
helpMessage(-1);
}
- args++;
+ ++args;
}
if(templatefilename == 0) {
fprintf(stderr, "# Too few arguments handed\n");
@@ -480,6 +534,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
hashMapKMA_destroySHM(templates, file, templatefilename);
fclose(file);
@@ -493,6 +548,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
hashMapKMA_destroySHM(templates, file, templatefilename);
fclose(file);
@@ -506,6 +562,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
length_destroySHM(file, templatefilename);
fclose(file);
@@ -520,6 +577,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
seq_destroySHM(file, templatefilename);
fclose(file);
@@ -531,6 +589,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
index_destroySHM(file, templatefilename);
fclose(file);
@@ -544,6 +603,7 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
name_destroySHM(file, templatefilename);
fclose(file);
@@ -557,8 +617,9 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
- hashMapKMA_setupSHM(templates, file, templatefilename);
+ status |= hashMapKMA_setupSHM(templates, file, templatefilename);
hashMap_shm_detach(templates);
fclose(file);
}
@@ -571,8 +632,9 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
- hashMapKMA_setupSHM(templates, file, templatefilename);
+ status |= hashMapKMA_setupSHM(templates, file, templatefilename);
hashMap_shm_detach(templates);
fclose(file);
}
@@ -585,10 +647,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
template_lengths = length_setupSHM(file, templatefilename);
- if(template_lengths)
+ if(template_lengths) {
shmdt(template_lengths);
+ } else {
+ status |= 1;
+ }
fclose(file);
}
templatefilename[file_len] = 0;
@@ -601,10 +667,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
seq = seq_setupSHM(file, templatefilename);
- if(seq)
+ if(seq) {
shmdt(seq);
+ } else {
+ status |= 1;
+ }
fclose(file);
}
templatefilename[file_len] = 0;
@@ -614,10 +684,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
index = index_setupSHM(file, templatefilename);
- if(index)
+ if(index) {
shmdt(index);
+ } else {
+ status |= 1;
+ }
fclose(file);
}
templatefilename[file_len] = 0;
@@ -629,10 +703,14 @@ int main(int argc, char *argv[]) {
file = fopen(templatefilename, "rb");
if(!file) {
fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ status |= errno;
} else {
template_names = name_setupSHM(file, templatefilename);
- if(template_names)
+ if(template_names) {
shmdt(template_names);
+ } else {
+ status |= 1;
+ }
fclose(file);
}
templatefilename[file_len] = 0;
@@ -655,5 +733,5 @@ int main(int argc, char *argv[]) {
* ipcs -a
*/
- return 0;
+ return status;
}
=====================================
KMA_index.c
=====================================
The diff for this file was not included because it is too large.
=====================================
KMA_update.c
=====================================
@@ -0,0 +1,357 @@
+/* Philip T.L.C. Clausen Jan 2017 plan at dtu.dk */
+
+/*
+ * Copyright (c) 2017, Philip Clausen, Technical University of Denmark
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define HU_LIMIT 65535
+#define U_LIMIT 4294967295
+
+struct hashMapKMA {
+ long unsigned size; // size of DB
+ long unsigned n; // k-mers stored
+ long unsigned null_index; // null value
+ long unsigned v_index; // size of values
+ unsigned kmersize; // k
+ unsigned prefix_len; // prefix length
+ long unsigned prefix; // prefix
+ unsigned *exist; // size long
+ long unsigned *exist_l; // size long, big DBs
+ unsigned *values; // compressed values
+ short unsigned *values_s; // compressed values, few templates
+ unsigned *key_index; // Relative
+ long unsigned *key_index_l; // Relative, 16 < k
+ unsigned *value_index; // Relative
+ long unsigned *value_index_l; // Relative, big DBs
+};
+
+unsigned DB_size, shifter;
+
+/* BASIC FUNCTIONS */
+void ERROR() {
+ fprintf(stderr, "Error: %d (%s)\n", errno, strerror(errno));
+ exit(errno);
+}
+
+void * smalloc(size_t size) {
+
+ void *dest;
+
+ dest = malloc(size);
+ if(!dest) {
+ ERROR();
+ }
+
+ return dest;
+}
+
+FILE * sfopen(char *filename, char *mode) {
+
+ FILE *file;
+
+ file = fopen(filename, mode);
+ if(!file) {
+ fprintf(stderr, "Filename:\t%s\n", filename);
+ ERROR();
+ }
+
+ return file;
+}
+
+long unsigned getKmer(long unsigned *compressor, unsigned cPos) {
+
+ unsigned iPos = (cPos & 31) << 1;
+ cPos >>= 5;
+
+ return (iPos <= shifter) ? ((compressor[cPos] << iPos) >> shifter) : (((compressor[cPos] << iPos) | (compressor[cPos + 1] >> (64-iPos))) >> shifter);
+}
+
+unsigned convertLength_014to015(char *filename) {
+
+ unsigned size, file_len;
+ int *lengths;
+ FILE *file;
+
+ file_len = strlen(filename);
+ strcat(filename, ".length.b");
+ file = sfopen(filename, "rb+");
+ filename[file_len] = 0;
+
+ fread(&size, sizeof(unsigned), 1, file);
+ lengths = smalloc(3 * size * sizeof(unsigned));
+
+ file_len = fread(lengths, sizeof(unsigned), 3 * size, file);
+ fseek(file, sizeof(unsigned), SEEK_SET);
+ if(file_len == size) {
+ file_len = 0;
+ } else if(file_len == 2 * size) {
+ fprintf(stderr, "DB is old.\n");
+ fprintf(stderr, "It will only work for \"-Sparse\" mapping!!!\n");
+ fwrite(lengths, sizeof(unsigned), size, file);
+ fwrite(lengths, sizeof(unsigned), 2 * size, file);
+ file_len = 0;
+ } else if(file_len == 3 * size) {
+ fwrite(lengths + 2 * size, sizeof(unsigned), size, file);
+ fwrite(lengths, sizeof(unsigned), 2 * size, file);
+ file_len = 1;
+ } else {
+ fprintf(stderr, "DB is malformed.\n");
+ exit(1);
+ }
+
+ fclose(file);
+ return file_len;
+}
+
+int hashMapKMA_014to015(char *filename, unsigned prefix) {
+
+ unsigned i, tmp, size, kmersize, seqsize, file_len;
+ long unsigned mask, *seq;
+ FILE *file;
+ struct hashMapKMA *dest;
+
+ /* rm filename.b */
+ file_len = strlen(filename);
+ strcat(filename, ".b");
+ remove(filename);
+ filename[file_len] = 0;
+
+ /* load DB */
+ strcat(filename, ".comp.b");
+ file = sfopen(filename, "rb");
+ filename[file_len] = 0;
+
+ /* load sizes */
+ dest = smalloc(sizeof(struct hashMapKMA));
+ fread(&DB_size, sizeof(unsigned), 1, file);
+ fread(&dest->kmersize, sizeof(unsigned), 1, file);
+ fread(&dest->prefix_len, sizeof(unsigned), 1, file);
+ fread(&dest->prefix, sizeof(long unsigned), 1, file);
+ fread(&dest->size, sizeof(long unsigned), 1, file);
+
+ kmersize = dest->kmersize;
+ mask = 0;
+ mask = (~mask) >> (sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1));
+ shifter = sizeof(long unsigned) * sizeof(long unsigned) - (kmersize << 1);
+
+ /* load changed size */
+ fread(&tmp, sizeof(unsigned), 1, file);
+ dest->n = tmp;
+ fread(&seqsize, sizeof(unsigned), 1, file); //seq size
+ fread(&tmp, sizeof(unsigned), 1, file);
+ dest->v_index = tmp;
+ fread(&tmp, sizeof(unsigned), 1, file);
+ dest->null_index = tmp;
+
+ /* make checks */
+ if(dest->size < dest->n || dest->n == 0) {
+ fprintf(stderr, "DB is not of version 0.14\n");
+ exit(1);
+ }
+
+ /* load arrays */
+ dest->exist = smalloc(dest->size * sizeof(unsigned));
+ if(dest->size != fread(dest->exist, sizeof(unsigned), dest->size, file)) {
+ return 1;
+ }
+
+ if(mask != (dest->size - 1)) {
+ seq = smalloc(seqsize * sizeof(long unsigned));
+ if(seqsize != fread(seq, sizeof(long unsigned), seqsize, file)) {
+ return 1;
+ }
+ } else {
+ seq = 0;
+ }
+
+ dest->values = smalloc(dest->v_index * sizeof(int));
+ if(dest->v_index != fread(dest->values, sizeof(int), dest->v_index, file)) {
+ return 1;
+ }
+
+ if(mask != (dest->size - 1)) {
+ dest->key_index = smalloc((dest->n + 1) * sizeof(unsigned));
+ if((dest->n + 1) != fread(dest->key_index, sizeof(unsigned), dest->n + 1, file)) {
+ return 1;
+ }
+
+ dest->value_index = smalloc(dest->n * sizeof(unsigned));
+ if(dest->n != fread(dest->value_index, sizeof(unsigned), dest->n, file)) {
+ return 1;
+ }
+ }
+ /* convert to new format */
+ /* change prefix if sparse - */
+ if(prefix && dest->prefix_len == 0) {
+ dest->prefix = 1;
+ }
+
+ strcat(filename, ".comp.b");
+ file = sfopen(filename, "wb");
+ filename[file_len] = 0;
+ fwrite(&DB_size, sizeof(unsigned), 1, file);
+ fwrite(&dest->kmersize, sizeof(unsigned), 1, file);
+ fwrite(&dest->prefix_len, sizeof(unsigned), 1, file);
+ fwrite(&dest->prefix, sizeof(long unsigned), 1, file);
+ fwrite(&dest->size, sizeof(long unsigned), 1, file);
+ fwrite(&dest->n, sizeof(long unsigned), 1, file);
+ fwrite(&dest->v_index, sizeof(long unsigned), 1, file);
+ fwrite(&dest->null_index, sizeof(long unsigned), 1, file);
+
+ /* exist */
+ fwrite(dest->exist, sizeof(unsigned), dest->size, file);
+
+ /* values */
+ if(DB_size < HU_LIMIT) {
+ dest->values_s = (short unsigned *)(dest->values);
+ for(i = 0; i < dest->v_index; ++i) {
+ dest->values_s[i] = dest->values[i];
+ }
+ size = sizeof(short unsigned);
+ } else {
+ size = sizeof(unsigned);
+ }
+ fwrite(dest->values, size, dest->v_index, file);
+ free(dest->values);
+
+ if(mask == (dest->size - 1)) {
+ return 0;
+ }
+
+ if(dest->kmersize <= 16) {
+ fwrite(dest->key_index, sizeof(unsigned), dest->n + 1, file);
+ } else {
+ dest->key_index_l = realloc(dest->key_index, (dest->n + 1) * sizeof(long unsigned));
+ if(dest->key_index_l) {
+ dest->key_index = (unsigned *)(dest->key_index_l);
+ } else {
+ ERROR();
+ }
+
+ i = dest->n + 1;
+ while(i--) {
+ dest->key_index_l[i] = getKmer(seq, dest->key_index[i]);
+ }
+ fwrite(dest->key_index_l, sizeof(long unsigned), dest->n + 1, file);
+ free(seq);
+ }
+ free(dest->key_index);
+
+ /* value_index */
+ fwrite(dest->value_index, sizeof(unsigned), dest->n, file);
+
+ return 0;
+}
+
+int index_014to015(char *filename) {
+
+ unsigned prefix, file_len, returner;
+ FILE *file;
+
+ file_len = strlen(filename);
+
+ /* change prefix if sparse - */
+ prefix = convertLength_014to015(filename);
+
+ returner = hashMapKMA_014to015(filename, prefix);
+
+ /* check for deCon */
+ strcat(filename, ".decon.b");
+ file = fopen(filename, "rb");
+ if(file) {
+ fclose(file);
+ /* change filename to: "filename.decon" */
+ filename[file_len + 6] = 0;
+ returner += hashMapKMA_014to015(filename, prefix);
+ }
+ filename[file_len] = 0;
+
+ return returner;
+}
+
+void helpMessage(int exeStatus) {
+ FILE *helpOut;
+ if(exeStatus == 0) {
+ helpOut = stdout;
+ } else {
+ helpOut = stderr;
+ }
+ fprintf(helpOut, "# KMA_update syncronises kma-indexes to the needed version.\n");
+ fprintf(helpOut, "# Options are:\t\tDesc:\t\t\t\t\tRequirements:\n");
+ fprintf(helpOut, "#\n");
+ fprintf(helpOut, "#\t-t_db\t\tTemplate DB\t\t\t\tREQUIRED\n");
+ fprintf(helpOut, "#\t-v\t\t[XXYY], from version major version XX\n#\t\t\tto major version YY. Use minor version,\n#\t\t\tif major version is 0.\t\t\tREQUIRED\n");
+ fprintf(helpOut, "#\t-h\t\tShows this help message\n");
+ fprintf(helpOut, "#\n");
+ exit(exeStatus);
+}
+
+int main(int argc, char *argv[]) {
+
+ unsigned args, version;
+ char *filename, *error;
+
+ /* set defaults */
+ filename = 0;
+ version = 0;
+
+ args = 1;
+ while(args < argc) {
+
+ if(strcmp(argv[args], "-t_db") == 0) {
+ if(++args < argc) {
+ filename = smalloc(strlen(argv[args]) + 64);
+ strcpy(filename, argv[args]);
+ }
+ } else if(strcmp(argv[args], "-v") == 0) {
+ if(++args < argc) {
+ version = strtoul(argv[args], &error, 10);
+ if(*error != 0) {
+ fprintf(stderr, " Invalid version specified.\n");
+ exit(2);
+ }
+ }
+ } else if(strcmp(argv[args], "-h") == 0) {
+ helpMessage(0);
+ } else {
+ fprintf(stderr, " Invalid option:\t%s\n", argv[args]);
+ fprintf(stderr, " Printing help message:\n");
+ helpMessage(1);
+ }
+ ++args;
+ }
+
+ if(!filename || !version) {
+ fprintf(stderr, "Insuffient amount of arguments handed!!!\n");
+ } else if(version == 1415) {
+ if(index_014to015(filename)) {
+ fprintf(stderr, "Conversion error.\n");
+ exit(3);
+ }
+ } else {
+ fprintf(stderr, "Invalid version swifting specified.\n");
+ fprintf(stderr, "Valid conversions:\n");
+ fprintf(stderr, "\t%d\t%.2f -> %.2f\n", 1415, 0.14, 0.15);
+ return 2;
+ }
+
+ return 0;
+}
=====================================
Makefile
=====================================
@@ -1,5 +1,5 @@
-CFLAGS = -std=c99 -w -O3
-BINS = kma kma_index kma_shm
+CFLAGS = -w -O3
+BINS = kma kma_index kma_shm kma_update
all: $(BINS)
@@ -7,10 +7,13 @@ kma: KMA.c
$(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz
kma_index: KMA_index.c
- $(CC) $(CFLAGS) -o $@ $< -lm
+ $(CC) $(CFLAGS) -o $@ $< -lm -lz
kma_shm: KMA_SHM.c
$(CC) $(CFLAGS) -o $@ $<
+kma_update: KMA_update.c
+ $(CC) $(CFLAGS) -o $@ $<
+
clean:
$(RM) $(BINS)
=====================================
README.md
=====================================
@@ -17,7 +17,10 @@ not exist. It works for long low quality reads as well, such as those from Nanop
Non-unique matches are resolved using the "ConClave" sorting scheme, and a consensus sequence are outputtet
in addition to other common attributes, based on our users demands.
-If you use KMA for your published research, then please cite the KMA paper.
+If you use KMA for your published research, then please cite:
+Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund,
+"Rapid and precise alignment of raw reads against redundant databases with KMA",
+BMC Bioinformatics, 2018;19:307.
# Usage #
@@ -65,6 +68,7 @@ Some of the most important options:
-Mt1 Match to only one template in the database.
-ID Minimum identity to output template match.
-apm Paired end method, “p”: Reward if pairing the reads, “u”: unite best template matches in each read if possible, “f” force paired reads to pair.
+-1t1 One read to one template, no splicing performed. Well suited for short reads and whole genome mapping.
-bc90 Basecalls should be significantly overrepresented, and have at least 90% agreement.
-bcNano Basecalls optimized for nanopore sequencing.
-mrs minimum alignment score normalized to alignment length.
@@ -136,7 +140,7 @@ Usage and options are available with the "-h" option on all three programs.
If in doubt, please mail any concerns or problems to: *plan at dtu.dk*.
# Citation #
-1. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", under review.
+1. Philip T.L.C. Clausen, Frank M. Aarestrup & Ole Lund, "Rapid and precise alignment of raw reads against redundant databases with KMA", BMC Bioinformatics, 2018;19:307.
# License #
Copyright (c) 2017, Philip Clausen, Technical University of Denmark
=====================================
debian/changelog
=====================================
@@ -1,4 +1,4 @@
-kma (0.14.4+git20180611.7e14ef6-1) UNRELEASED; urgency=medium
+kma (1.1.7-1) UNRELEASED; urgency=medium
* Initial release (Closes: #<bug>)
=====================================
debian/control
=====================================
@@ -10,7 +10,7 @@ Vcs-Browser: https://salsa.debian.org/med-team/kma
Vcs-Git: https://salsa.debian.org/med-team/kam.git
Homepage: https://bitbucket.org/genomicepidemiology/kma
-Package: kme
+Package: kma
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends}
=====================================
debian/kma.1
=====================================
@@ -1,11 +1,10 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
-.TH KMA "1" "June 2018" "kma 0.14.4+git20180611.7e14ef6" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.8.
+.TH KMA "1" "February 2019" "kma 1.1.7" "User Commands"
.SH NAME
-kma \- mapps raw reads to a template database, for optimal performance it is designed to use 3 threads
+kma \- mapps raw reads to a template database
.SH DESCRIPTION
-.IP
-KMA\-0.14.4 mapps raw reads to a template database, for optimal performance it is designed to use 3 threads.
-Options are: Desc: Default: Requirements:
+mapps raw reads to a template database.
+.SH OPTIONS
.TP
\fB\-o\fR
Output file None REQUIRED
@@ -28,6 +27,9 @@ Kmersize DB defined
\fB\-e\fR
evalue 0.05
.TP
+\fB\-ConClave\fR
+ConClave version 1
+.TP
\fB\-mem_mode\fR
Use kmers to choose best
template, and save memory False
@@ -35,6 +37,12 @@ template, and save memory False
\fB\-ex_mode\fR
Searh kmers exhaustively False
.TP
+\fB\-ef\fR
+Print additional features False
+.TP
+\fB\-vcf\fR
+Make vcf file, 2 to apply FT False/0
+.TP
\fB\-deCon\fR
Remove contamination False
.TP
@@ -89,12 +97,19 @@ Swap DB to disk 0 (lvl)
\fB\-1t1\fR
Skip HMM False
.TP
+\fB\-ck\fR
+Count kmers instead of
+pseudo alignment False
+.TP
+\fB\-ca\fR
+Make circular alignments False
+.TP
\fB\-boot\fR
Bootstrap sequence False
.TP
\fB\-bc\fR
Base calls should be
-significantly overrepresented. True
+significantly overrepresented. [True]
.TP
\fB\-bc90\fR
Base calls should be both
@@ -110,6 +125,9 @@ Both mrs and p_value thresholds
has to reached to in order to
report a template hit. or
.TP
+\fB\-mq\fR
+Minimum mapping quality 0
+.TP
\fB\-mrs\fR
Minimum alignment score,
normalized to alignment length 0.50
@@ -129,6 +147,9 @@ Penalty for gap extension \fB\-1\fR
\fB\-per\fR
Reward for pairing reads 7
.TP
+\fB\-cge\fR
+Set CGE penalties and rewards False
+.TP
\fB\-t\fR
Number of threads 1
.TP
=====================================
debian/kma_index.1
=====================================
@@ -1,11 +1,10 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
-.TH KMA_INDEX "1" "June 2018" "kma_index 0.14.4+git20180611.7e14ef6" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.8.
+.TH KMA_INDEX "1" "February 2019" "kma_index 1.1.7" "User Commands"
.SH NAME
kma_index \- creates the databases needed to run KMA, from a list of fasta files given
.SH DESCRIPTION
-.IP
kma_index creates the databases needed to run KMA, from a list of fasta files given.
-Options are: Desc: Default:
+.SH OPTIONS
.TP
\fB\-i\fR
Input/query file name (STDIN: "\-\-") None
@@ -38,11 +37,14 @@ Kmersize for indexing 16
Minimum length of templates kmersize (16)
.TP
\fB\-CS\fR
-Start Chain size 1 M
+Start Chain size 1 M
.TP
\fB\-ME\fR
Mega DB False
.TP
+\fB\-NI\fR
+Do not dump *.index.b False
+.TP
\fB\-Sparse\fR
Make Sparse DB ('\-' for no prefix) None/False
.TP
@@ -56,6 +58,9 @@ Homology query 1.0
Both homolgy thresholds
has to be reached or
.TP
+\fB\-v\fR
+Version
+.TP
\fB\-h\fR
Shows this help message
.SH AUTHOR
=====================================
debian/kma_shm.1
=====================================
@@ -1,11 +1,10 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
-.TH KMA_SHM "1" "June 2018" "kma_shm 0.14.4+git20180611.7e14ef6" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.8.
+.TH KMA_SHM "1" "February 2019" "kma_shm 1.1.7" "User Commands"
.SH NAME
kma_shm \- sets up a shared database (sysV) for mapping with KMA
.SH DESCRIPTION
-.IP
kma_shm sets up a shared database (sysV) for mapping with KMA.
-Options are: Desc: Default: Requirements:
+.SH OPTIONS
.TP
\fB\-t_db\fR
Template DB None REQUIRED
@@ -19,6 +18,9 @@ Level of shared memory 1
\fB\-shm\-h\fR
Explain shm levels
.TP
+\fB\-v\fR
+Version
+.TP
\fB\-h\fR
Shows this help message
.SH AUTHOR
=====================================
debian/patches/hardening.patch
=====================================
@@ -4,24 +4,28 @@ Description: Propagate hardening options
--- a/Makefile
+++ b/Makefile
-@@ -1,16 +1,16 @@
--CFLAGS = -std=c99 -w -O3
-+CFLAGS += -std=c99 -w -O3
- BINS = kma kma_index kma_shm
+@@ -1,19 +1,19 @@
+-CFLAGS = -w -O3
++CFLAGS += -w -O3
+ BINS = kma kma_index kma_shm kma_update
all: $(BINS)
kma: KMA.c
- $(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz
-+ $(CC) $(CFLAGS) -g -o $@ $< -lm -lpthread -lz $(LDFLAGS)
++ $(CC) $(CFLAGS) -o $@ $< -lm -lpthread -lz $(LDFLAGS)
kma_index: KMA_index.c
-- $(CC) $(CFLAGS) -o $@ $< -lm
-+ $(CC) $(CFLAGS) -g -o $@ $< -lm $(LDFLAGS)
+- $(CC) $(CFLAGS) -o $@ $< -lm -lz
++ $(CC) $(CFLAGS) -o $@ $< -lm -lz $(LDFLAGS)
kma_shm: KMA_SHM.c
- $(CC) $(CFLAGS) -o $@ $<
-+ $(CC) $(CFLAGS) -g -o $@ $< $(LDFLAGS)
++ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+ kma_update: KMA_update.c
+- $(CC) $(CFLAGS) -o $@ $<
++ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
clean:
$(RM) $(BINS)
=====================================
debian/watch
=====================================
@@ -1,4 +1,6 @@
version=4
-opts="mode=git,pretty=0.14.4+git%cd.%h" \
- https://bitbucket.org/genomicepidemiology/kma.git HEAD
+https://bitbucket.org/genomicepidemiology/kma/downloads/?tab=tags .*/@ANY_VERSION@@ARCHIVE_EXT@
+
+#opts="mode=git,pretty=1.1.7+git%cd.%h" \
+# https://bitbucket.org/genomicepidemiology/kma.git HEAD
View it on GitLab: https://salsa.debian.org/med-team/kma/compare/b58f9042625f288adc2415a3a9454c748bc258d4...e8f488650767b6e6391d52937bbc782dc4ac4e13
--
View it on GitLab: https://salsa.debian.org/med-team/kma/compare/b58f9042625f288adc2415a3a9454c748bc258d4...e8f488650767b6e6391d52937bbc782dc4ac4e13
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190219/819a99ac/attachment-0001.html>
More information about the debian-med-commit
mailing list