[med-svn] [Git][med-team/genometester][master] 8 commits: Version=5
Andreas Tille (@tille)
gitlab at salsa.debian.org
Thu Dec 11 20:32:55 GMT 2025
Andreas Tille pushed to branch master at Debian Med / genometester
Commits:
d69ace17 by Andreas Tille at 2025-12-11T14:48:22+01:00
Version=5
- - - - -
f71423a6 by Andreas Tille at 2025-12-11T14:49:37+01:00
New upstream version
- - - - -
aa4cc7b1 by Andreas Tille at 2025-12-11T14:49:38+01:00
New upstream version 4.0+git20221122.71e6625
- - - - -
ee7f38b2 by Andreas Tille at 2025-12-11T14:49:38+01:00
Update upstream source from tag 'upstream/4.0+git20221122.71e6625'
Update to upstream version '4.0+git20221122.71e6625'
with Debian dir cd315fe16a0fbff35585b472153baf4b607cb26b
- - - - -
78ed819e by Andreas Tille at 2025-12-11T14:49:39+01:00
Standards-Version: 4.7.2 (routine-update)
- - - - -
4c1ab384 by Andreas Tille at 2025-12-11T14:49:42+01:00
Remove duplicate line from changelog.
Changes-By: lintian-brush
- - - - -
40feacd6 by Andreas Tille at 2025-12-11T21:26:23+01:00
Refresh patches
- - - - -
4238f5cc by Andreas Tille at 2025-12-11T21:32:22+01:00
Fix C vs C++ confusion by rather calling CC for C code Closes: #1107684
- - - - -
22 changed files:
- debian/changelog
- debian/control
- debian/patches/add_debug_symbols.patch
- − debian/patches/avoid_redefinition.patch
- + debian/patches/cross.patch
- debian/patches/hardening.patch
- debian/patches/series
- debian/watch
- src/Makefile
- src/database.c
- src/fasta.c
- src/gassembler.c
- src/glistcompare.c
- src/glistmaker.c
- src/glistquery.c
- src/gmer_counter.c
- src/set-operations.c
- src/set-operations.h
- src/utils.c
- src/version.h
- src/word-map.c
- src/word-map.h
Changes:
=====================================
debian/changelog
=====================================
@@ -1,13 +1,21 @@
-genometester (4.0+git20211112.9030deb-1) UNRELEASED; urgency=medium
+genometester (4.0+git20221122.71e6625-1) UNRELEASED; urgency=medium
- * Fix watchfile to detect new versions on github
+ [ Andreas Tille ]
* New upstream version
+ * d/watch:
+ - detect new versions on github
+ - version=5
* Standards-Version: 4.6.0 (routine-update)
* debhelper-compat 13 (routine-update)
* Add missing build dependency on dh addon.
* Build-Depends: zlib1g-dev
+ * Standards-Version: 4.7.2 (routine-update)
- -- Andreas Tille <tille at debian.org> Sun, 16 Jan 2022 17:13:40 +0100
+ [ Helmut Grohne ]
+ * Fix C vs C++ confusion by rather calling CC for C code
+ Closes: #1107684
+
+ -- Andreas Tille <tille at debian.org> Thu, 11 Dec 2025 14:49:37 +0100
genometester (4.0+git20200511.91cecb5+dfsg-1) unstable; urgency=medium
=====================================
debian/control
=====================================
@@ -7,7 +7,7 @@ Priority: optional
Build-Depends: debhelper-compat (= 13),
debhelper,
zlib1g-dev
-Standards-Version: 4.6.0
+Standards-Version: 4.7.2
Vcs-Browser: https://salsa.debian.org/med-team/genometester
Vcs-Git: https://salsa.debian.org/med-team/genometester.git
Homepage: https://github.com/bioinfo-ut/GenomeTester4
=====================================
debian/patches/add_debug_symbols.patch
=====================================
@@ -4,7 +4,7 @@ Description: Add debug symbols
--- a/src/Makefile
+++ b/src/Makefile
-@@ -164,7 +164,7 @@ AZ_SOURCES = \
+@@ -165,7 +165,7 @@ AZ_SOURCES = \
az/serialization.c az/serialization.h \
az/types.c az/types.h
=====================================
debian/patches/avoid_redefinition.patch deleted
=====================================
@@ -1,31 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
- Aaron M. Ucko <ucko at debian.org>
-Last-Update: Sun, 16 Jan 2022 21:32:08 -0500
-Origin: https://lists.debian.org/debian-med/2022/01/msg00030.html
-Description: Avoid "previous definition of ..."
-
---- a/src/word-map.h
-+++ b/src/word-map.h
-@@ -86,19 +86,19 @@ struct _GT4WordMapClass {
-
- unsigned int gt4_word_map_get_type (void);
-
--inline extern uint64_t
-+static inline uint64_t
- gt4_word_map_get_word (const GT4WordMap *wmap, uint64_t idx)
- {
- return *((uint64_t *) (wmap->wordlist + 12 * idx));
- }
-
--inline extern uint32_t
-+static inline uint32_t
- gt4_word_map_get_count (const GT4WordMap *wmap, uint64_t idx)
- {
- return *((uint32_t *) (wmap->wordlist + 12 * idx + 8));
- }
-
--inline extern uint64_t *
-+static inline uint64_t *
- gt4_word_map_get_word_ptr (const GT4WordMap *wmap, uint64_t idx)
- {
- return (uint64_t *) (wmap->wordlist + 12 * idx);
=====================================
debian/patches/cross.patch
=====================================
@@ -0,0 +1,101 @@
+Author: Helmut Grohne <helmut at subdivi.de>
+Last-Update: 2025-12-11
+Bug-Debian: https://bugs.debian.org/1107684
+Description: Fix C vs C++ confusion by rather calling CC for C code
+
+--- a/src/Makefile
++++ b/src/Makefile
+@@ -1,7 +1,7 @@
+ # Project: GenomeTester v4.0
+
+ VERSION = 4.0
+-CXX = gcc
++CC = gcc
+
+ # C Files
+
+@@ -172,39 +172,39 @@ LIBS = -lm -lpthread -lrt -lz
+ INCS = -I.
+ BINS = glistmaker glistquery glistcompare
+
+-#CXXFLAGS = $(INCS) $(DEBUGFLAGS) -Wall
+-CXXFLAGS += $(INCS) $(RELEASEFLAGS) -Wall -std=c99 -D_DEFAULT_SOURCE -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE
++#CFLAGS = $(INCS) $(DEBUGFLAGS) -Wall
++CFLAGS += $(INCS) $(RELEASEFLAGS) -Wall -std=c99 -D_DEFAULT_SOURCE -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE
+
+ .PHONY: all all-before all-after clean clean-custom
+
+ all: all-before $(BINS) all-after
+
+ glistmaker: $(LISTMAKER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES)
+- $(CXX) $(LISTMAKER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o glistmaker $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
++ $(CC) $(LISTMAKER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o glistmaker $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
+
+ glistquery: $(LISTQUERY_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES)
+- $(CXX) $(LISTQUERY_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o glistquery $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
++ $(CC) $(LISTQUERY_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o glistquery $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
+
+ glistcompare: $(LISTCOMPARE_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES)
+- $(CXX) $(LISTCOMPARE_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o glistcompare $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
++ $(CC) $(LISTCOMPARE_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o glistcompare $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
+
+ gindexer: $(GINDEXER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES)
+- $(CXX) $(GINDEXER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o gindexer $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
++ $(CC) $(GINDEXER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o gindexer $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
+
+ gmer_counter: $(GMER_COUNTER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES)
+- $(CXX) $(GMER_COUNTER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o gmer_counter $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
++ $(CC) $(GMER_COUNTER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o gmer_counter $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
+
+ gmer_caller: $(GMER_CALLER_SOURCES)
+- $(CXX) $(GMER_CALLER_SOURCES) -o gmer_caller $(LIBS) $(CXXFLAGS) -Wall
++ $(CC) $(GMER_CALLER_SOURCES) -o gmer_caller $(LIBS) $(CXXFLAGS) -Wall
+
+ gassembler: $(GASSEMBLER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES)
+- $(CXX) $(GASSEMBLER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o gassembler $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
++ $(CC) $(GASSEMBLER_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o gassembler $(LIBS) $(CXXFLAGS) $(AZ_FLAGS)
+
+ aleq: $(ALEQ_SOURCES)
+- $(CXX) $(ALEQ_SOURCES) -o aleq $(LIBS) $(CXXFLAGS) -Wall
++ $(CC) $(ALEQ_SOURCES) -o aleq $(LIBS) $(CXXFLAGS) -Wall
+
+ kmer_predictor: $(KMER_PREDICTOR_SOURCES)
+- $(CXX) $(KMER_PREDICTOR_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o kmer_predictor $(LIBS) $(AZ_FLAGS) $(CXXFLAGS)
++ $(CC) $(KMER_PREDICTOR_SOURCES) $(AZ_SOURCES) $(ARIKKEI_SOURCES) -o kmer_predictor $(LIBS) $(AZ_FLAGS) $(CXXFLAGS)
+
+ clean: clean-custom
+ rm -f *.o $(BINS)
+--- a/src/Makefile.gmer
++++ b/src/Makefile.gmer
+@@ -1,7 +1,7 @@
+ # Project: GMER Caller
+
+ VERSION = 1.0
+-CXX = gcc
++CC = gcc
+
+ # C Files
+
+@@ -37,18 +37,18 @@ LIBS = -lm -lpthread -lrt
+ INCS = -I.
+ BINS = gmer_counter gmer_caller
+
+-#CXXFLAGS = $(INCS) $(DEBUGFLAGS) -Wall
+-CXXFLAGS = $(INCS) $(RELEASEFLAGS) -Wall
++#XFLAGS = $(INCS) $(DEBUGFLAGS) -Wall
++CFLAGS = $(INCS) $(RELEASEFLAGS) -Wall
+
+ .PHONY: all all-before all-after clean clean-custom
+
+ all: all-before $(BINS) all-after
+
+ gmer_counter: $(GMERCOUNTER_SOURCES)
+- $(CXX) $(GMERCOUNTER_SOURCES) -o gmer_counter $(LIBS) $(CXXFLAGS) -Wall
++ $(CC) $(GMERCOUNTER_SOURCES) -o gmer_counter $(LIBS) $(CFLAGS) -Wall
+
+ gmer_caller: $(GMER_CALLER_SOURCES)
+- $(CXX) $(GMER_CALLER_SOURCES) -o gmer_caller $(LIBS) $(CXXFLAGS) -Wall
++ $(CC) $(GMER_CALLER_SOURCES) -o gmer_caller $(LIBS) $(CFLAGS) -Wall
+
+ dist: $(GMERCOUNTER_SOURCES) $(GMER_CALLER_SOURCES)
+ mkdir fastgt_$(VERSION);
=====================================
debian/patches/hardening.patch
=====================================
@@ -4,12 +4,12 @@ Description: Propagate hardening options
--- a/src/Makefile
+++ b/src/Makefile
-@@ -172,7 +172,7 @@ INCS = -I.
+@@ -173,7 +173,7 @@ INCS = -I.
BINS = glistmaker glistquery glistcompare
#CXXFLAGS = $(INCS) $(DEBUGFLAGS) -Wall
--CXXFLAGS = $(INCS) $(RELEASEFLAGS) -Wall
-+CXXFLAGS += $(INCS) $(RELEASEFLAGS) -Wall $(LDFLAGS)
+-CXXFLAGS = $(INCS) $(RELEASEFLAGS) -Wall -std=c99 -D_DEFAULT_SOURCE -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE
++CXXFLAGS += $(INCS) $(RELEASEFLAGS) -Wall -std=c99 -D_DEFAULT_SOURCE -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE
.PHONY: all all-before all-after clean clean-custom
=====================================
debian/patches/series
=====================================
@@ -1,3 +1,3 @@
add_debug_symbols.patch
hardening.patch
-avoid_redefinition.patch
+cross.patch
=====================================
debian/watch
=====================================
@@ -1,6 +1,6 @@
-version=4
+Version: 5
-opts="mode=git,pretty=4.0+git%cd.%h,repacksuffix=+dfsg,dversionmangle=auto,uversionmangle=s/_/./g,repack,compression=xz" \
- https://github.com/bioinfo-ut/GenomeTester4.git HEAD
-
-# https://github.com/bioinfo-ut/GenomeTester4/releases .*/Version_(\d[_.\d]+)@ARCHIVE_EXT@
+Source: https://github.com/bioinfo-ut/GenomeTester4.git
+Matching-Pattern: HEAD
+Mode: git
+Git-Pretty: 4.0+git%cd.%h
=====================================
src/Makefile
=====================================
@@ -46,6 +46,7 @@ LISTQUERY_SOURCES = \
sequence-source.c sequence-source.h \
sequence-stream.c sequence-stream.h \
sequence-zstream.c sequence-zstream.h \
+ set-operations.c set-operations.h \
common.c common.h \
queue.c queue.h \
utils.c utils.h \
@@ -172,7 +173,7 @@ INCS = -I.
BINS = glistmaker glistquery glistcompare
#CXXFLAGS = $(INCS) $(DEBUGFLAGS) -Wall
-CXXFLAGS = $(INCS) $(RELEASEFLAGS) -Wall
+CXXFLAGS = $(INCS) $(RELEASEFLAGS) -Wall -std=c99 -D_DEFAULT_SOURCE -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE
.PHONY: all all-before all-after clean clean-custom
=====================================
src/database.c
=====================================
@@ -42,6 +42,12 @@ count_lines_from_text (const unsigned char *cdata, size_t csize, unsigned int *w
unsigned int lengths[4];
unsigned int ntokenz;
+ /* Skip comments */
+ if (cdata[cpos] == '#') {
+ while ((cpos < csize) && (cdata[cpos] != '\n')) cpos += 1;
+ if (cpos < csize) cpos += 1;
+ continue;
+ }
ntokenz = split_line (cdata + cpos, csize - cpos, tokenz, lengths, 3);
if (ntokenz < 2) {
fprintf (stderr, "Line %u has <2 (%u) tokens\n", n_lines, ntokenz);
@@ -115,7 +121,7 @@ gt4_gmer_db_new_from_text (const unsigned char *cdata, unsigned long long csize,
node_bits = get_bits (nlines + 1);
kmer_bits = get_bits (max_kmers);
if ((node_bits + kmer_bits) > 31) {
- fprintf (stderr, "Too many nodes and kmers (%u (%u bits), %u (%u bits)\n", nlines + 1, max_kmers, node_bits, kmer_bits);
+ fprintf (stderr, "Too many nodes and kmers (%u (%u bits), %u (%u bits)\n", nlines + 1, node_bits, max_kmers, kmer_bits);
return 0;
}
/* Set up DB */
@@ -162,6 +168,12 @@ gt4_gmer_db_new_from_text (const unsigned char *cdata, unsigned long long csize,
unsigned int lengths[65536];
unsigned int ntokenz, n_kmers;
unsigned int i;
+ /* Skip comments */
+ if (cdata[cpos] == '#') {
+ while ((cpos < csize) && (cdata[cpos] != '\n')) cpos += 1;
+ if (cpos < csize) cpos += 1;
+ continue;
+ }
/* Initialize */
memset (db->nodes + idx, 0, sizeof (Node));
/* Parse ID + number of kmers */
@@ -213,7 +225,7 @@ gt4_gmer_db_new_from_text (const unsigned char *cdata, unsigned long long csize,
kmer2 = code2 & ((1 << db->kmer_bits) - 1);
fprintf (stderr, "KMer already present (current node %u (%s) kmer %u/%u (%s) code %u) previous %u (%s) kmer %u/%u code %u\n",
idx, db->names + db->nodes[idx].name, i, (dir != 0), word_to_string (word, db->wordsize), code,
- idx2, db->names + db->nodes[idx2].name, kmer2, ((code2 & 0x7fffffff) != 0), code2);
+ idx2, db->names + db->nodes[idx2].name, kmer2, ((code2 & 0x80000000) != 0), code2);
break;
}
}
=====================================
src/fasta.c
=====================================
@@ -252,13 +252,15 @@ fasta_reader_read_nwords (GT4FastaReader *reader, unsigned long long maxwords,
nwords += 1;
}
/* We increase nucleotide position for N too */
- if (cval > ' ') {
- reader->seq_npos += 1;
- }
+ //if (cval > ' ') {
+ reader->seq_npos += 1;
+ //}
} else if (cval >= ' ') {
reader->wordfw = 0;
reader->wordrv = 0;
reader->currentlength = 0;
+ /* We increase nucleotide position for N too */
+ reader->seq_npos += 1;
}
}
reader->cpos += 1;
=====================================
src/gassembler.c
=====================================
@@ -698,6 +698,9 @@ print_usage (FILE *ofs, unsigned int advanced, int exit_value)
static unsigned int only_chr = CHR_1;
static unsigned int only_pos = 0;
+GT4Scout db_scout;
+GT4Scout seq_scout;
+
int
main (int argc, const char *argv[])
{
@@ -1076,8 +1079,11 @@ main (int argc, const char *argv[])
assemble_recursive (db, files, ref_chr, ref_start, ref_end, ref, kmers, nkmers);
}
- if (prefetch_db || prefetch_seq) {
- delete_scouts ();
+ if (prefetch_db) {
+ gt4_delete_scout (&db_scout);
+ }
+ if (prefetch_seq) {
+ //gt4_delete_scout (&seq_scout);
}
return 0;
@@ -1098,6 +1104,9 @@ assemble_recursive (GT4GmerDB *db, SeqFile *files, unsigned int ref_chr, unsigne
strncpy (dup, ref, len);
adata->ref = dup;
adata->cblock = (CallBlock *) malloc (sizeof (CallBlock));
+ memset (adata->cblock, 0, sizeof (CallBlock));
+ adata->cblock->calls = (Call *) malloc (MAX_REFERENCE_LENGTH * 2 * sizeof (Call));
+ memset (adata->cblock->calls, 0, MAX_REFERENCE_LENGTH * 2 * sizeof (Call));
adata->cblock->chr = adata->chr;
adata->cblock->start = adata->start;
adata->cblock->end = adata->end;
@@ -2471,8 +2480,9 @@ load_db_or_die (const char *db_name, const char *seq_dir, const char *id)
exit (1);
}
if (prefetch_db) {
- scout_mmap (cdata, csize);
- sleep (10);
+ db_scout.cdata = cdata;
+ db_scout.csize = csize;
+ gt4_scout_mmap (&db_scout);
}
db = gt4_gmer_db_new_from_binary (cdata, csize);
if (!db) {
@@ -2533,7 +2543,7 @@ map_sequences (GT4GmerDB *db, const char *seq_dir)
return NULL;
}
if (prefetch_seq) {
- scout_mmap (files[i].cdata, files[i].csize);
+ //scout_mmap (files[i].cdata, files[i].csize);
}
}
}
=====================================
src/glistcompare.c
=====================================
@@ -341,8 +341,13 @@ int main (int argc, const char *argv[])
exit (1);
}
- if (!find_intrsec && (rule == RULE_SUBTRACT || rule == RULE_MIN || rule == RULE_FIRST || rule == RULE_SECOND)) {
- fprintf (stderr, "Error: Rules min, subtract, fist and second can only be used with finding the intersection.\n");
+ if (!find_intrsec && (rule == RULE_MIN || rule == RULE_FIRST || rule == RULE_SECOND)) {
+ fprintf (stderr, "Error: Rules min, fist and second can only be used with finding the intersection.\n");
+ exit (1);
+ }
+
+ if ((!find_intrsec && !find_diff) && (rule == RULE_SUBTRACT)) {
+ fprintf (stderr, "Error: Rule subtract can only be used with intersection and difference.\n");
exit (1);
}
=====================================
src/glistmaker.c
=====================================
@@ -278,6 +278,15 @@ main (int argc, const char *argv[])
exit (1);
}
+ if (debug && create_index) {
+ for (i = 0; i < mq.n_sources; i++) {
+ fprintf (stderr, "%u: %s start %llu subseqs %u\n", i, i_files[mq.sources[i].file_idx].name, mq.sources[i].start, mq.sources[i].n_subseqs);
+ //for (j = 0; j < mq.sources[i].n_subseqs; j++) {
+ // fprintf (stderr, "%llu %u\n", i_files[i].subseqs[j]->name_pos, i_files[i].subseqs[j]->name_len);
+ //}
+ }
+ }
+
/* Do work */
process (&mq.queue, 0, &mq);
gt4_queue_lock (&mq.queue);
@@ -626,6 +635,27 @@ write_index (FILE *ofs, const char *loc_files[], unsigned int n_loc_files, GT4Li
unsigned int version;
unsigned long long n_words_loc, n_words, n_locations_loc, n_locs, file_block_loc, file_block_pos, kmer_list_loc, kmer_list_pos, locations_loc, locations_pos;
unsigned char zero[16] = { 0 };
+
+ if (debug) {
+ for (i = 0; i < mq->n_sources; i++) {
+ unsigned int j;
+ GT4LMQSource *src = &mq->sources[i];
+ for (j = 0; j < src->n_subseqs; j++) {
+ GT4SubSequence *ss = &src->subseqs[j];
+ fprintf (stderr, "Src %u subseq %u np %llu nl %u ss %u sl %u\n", i, j, ss->name_pos, ss->name_len, ss->seq_pos, ss->seq_len);
+ unsigned long long np = src->start + ss->name_pos;
+ IFile *ifile = &i_files[src->file_idx];
+ FILE *f = fopen(ifile->name, "r");
+ fseek (f, np, SEEK_SET);
+ char b[256];
+ fread (b, ss->name_len, 1, f);
+ b[ss->name_len] = 0;
+ fclose (f);
+ fprintf (stderr, "%s\n", b);
+ }
+ }
+ }
+
/* Determine file data */
for (i = 0; i < n_i_files; i++) {
GT4LMQSource *sources[1024];
@@ -652,10 +682,34 @@ write_index (FILE *ofs, const char *loc_files[], unsigned int n_loc_files, GT4Li
for (j = 0; j < n_sources; j++) {
unsigned int k;
for (k = 0; k < sources[j]->n_subseqs; k++) {
- i_files[i].subseqs[sources[j]->first_subseq + k] = &sources[j]->subseqs[k];
+ GT4SubSequence *ss = &sources[j]->subseqs[k];
+ i_files[i].subseqs[sources[j]->first_subseq + k] = ss;
+ // Adjust positions
+ ss->name_pos += sources[j]->start;
+ //fprintf (stderr, "%u %u start %llu np %llu\n", j, k, sources[j]->start, ss->name_pos);
+ }
+ }
+ }
+
+ if (debug) {
+ for (i = 0; i < n_i_files; i++) {
+ unsigned int j;
+ IFile *ifile = &i_files[i];
+ FILE *f = fopen(ifile->name, "r");
+ for (j = 0; j < ifile->n_subseqs; j++) {
+ GT4SubSequence *ss = ifile->subseqs[j];
+ fprintf (stderr, "Src %u subseq %u np %llu nl %u ss %u sl %u\n", i, j, ss->name_pos, ss->name_len, ss->seq_pos, ss->seq_len);
+ unsigned long long np = ss->name_pos;
+ fseek (f, np, SEEK_SET);
+ char b[256];
+ fread (b, ss->name_len, 1, f);
+ b[ss->name_len] = 0;
+ fprintf (stderr, "%s\n", b);
}
+ fclose (f);
}
}
+
/* Determine bitsizes */
for (i = 0; i < mq->n_sources; i++) {
unsigned int last_subseq = mq->sources[i].first_subseq + mq->sources[i].n_subseqs - 1;
=====================================
src/glistquery.c
=====================================
@@ -33,6 +33,7 @@
#include "utils.h"
#include "sequence.h"
#include "sequence-stream.h"
+#include "set-operations.h"
#include "fasta.h"
#include "version.h"
#include "index-map.h"
@@ -70,6 +71,39 @@ enum {
FILES,
SEQUENCES
};
+
+typedef struct _DumpData DumpData;
+
+struct _DumpData {
+ unsigned int n_lists;
+ unsigned int wlen;
+};
+
+static unsigned int
+dump_callback (uint64_t word, uint32_t *counts, void *data)
+{
+ DumpData *dd = (DumpData *) data;
+ unsigned int i;
+ fprintf (stdout, "%s", word_to_string (word, dd->wlen));
+ for (i = 0; i < dd->n_lists; i++) {
+ fprintf (stdout, "\t%u", counts[i]);
+ }
+ fprintf (stdout, "\n");
+ return 0;
+}
+
+static void
+dump_lists (AZObject *objs[], unsigned int n_objs, unsigned int wlen, unsigned int is_union)
+{
+ DumpData dd;
+ dd.n_lists = n_objs;
+ dd.wlen = wlen;
+ if (is_union) {
+ gt4_is_union (objs, n_objs, dump_callback, &dd);
+ } else {
+ gt4_union (objs, n_objs, dump_callback, &dd);
+ }
+}
int main (int argc, const char *argv[])
{
@@ -80,11 +114,12 @@ int main (int argc, const char *argv[])
unsigned int nmm = 0;
unsigned int pm3 = 0;
char *end;
- int printall = 0;
+ int printall = 0, print_header = 0;
unsigned int minfreq = 0, maxfreq = UINT_MAX;
unsigned int distro = 0;
unsigned int bloom = 0;
unsigned int command = QUERY;
+ unsigned int is_union = 0;
for (argidx = 1; argidx < argc; argidx++) {
if (!strcmp (argv[argidx], "-v") || !strcmp (argv[argidx], "--version")) {
@@ -199,8 +234,12 @@ int main (int argc, const char *argv[])
use_3p = 1;
} else if (!strcmp(argv[argidx], "--5p")) {
use_5p = 1;
+ } else if (!strcmp(argv[argidx], "--header")) {
+ print_header = 1;
} else if (!strcmp(argv[argidx], "--bloom")) {
bloom = 1;
+ } else if (!strcmp(argv[argidx], "--is_union")) {
+ is_union = 1;
} else if (!strcmp(argv[argidx], "--disable_scouts")) {
use_scouts = 0;
} else if (argv[argidx][0] != '-') {
@@ -337,8 +376,19 @@ int main (int argc, const char *argv[])
/* If no options is given print all lists/indices */
if (!seqfilename && !querylistfilename && !queryfilename && !querystring) {
- for (i = 0; i < n_lists; i++) {
- print_full_map (maps[i], locations);
+ if (n_lists > 1) {
+ if (print_header) {
+ fprintf (stdout, "KMER");
+ for (i = 0; i < n_lists; i++) {
+ fprintf (stdout, "\t%s", lists[i]);
+ }
+ fprintf (stdout, "\n");
+ }
+ dump_lists (maps, n_lists, wlen, is_union);
+ } else {
+ for (i = 0; i < n_lists; i++) {
+ print_full_map (maps[i], locations);
+ }
}
exit (0);
}
@@ -463,6 +513,8 @@ typedef struct _QueryData QueryData;
struct _QueryData {
GT4WordDictImplementation *dict_impl;
GT4WordDictInstance *dict_inst;
+ GT4WordSListImplementation *slist_impl;
+ GT4WordSListInstance *slist_inst;
GT4WordIndexImplementation *index_impl;
GT4WordIndexInstance *index_inst;
unsigned int n_mm;
@@ -647,6 +699,23 @@ search_fasta (AZObject *obj, const char *fname, unsigned int n_mm, unsigned int
return result;
}
+static unsigned int
+search_list_zipper (GT4WordSListImplementation *l_impl, GT4WordSListInstance *l_inst, QueryData *qd)
+{
+ gt4_word_slist_get_first_word (l_impl, l_inst);
+ gt4_word_slist_get_first_word (qd->slist_impl, qd->slist_inst);
+ while ((qd->slist_inst->idx < qd->slist_inst->num_words) && (l_inst->idx < l_inst->num_words)) {
+ while (qd->slist_inst->word < l_inst->word) {
+ gt4_word_slist_get_next_word (qd->slist_impl, qd->slist_inst);
+ }
+ if (qd->slist_inst->word == l_inst->word) {
+ cb_print (l_inst->word, l_inst->count, qd);
+ }
+ gt4_word_slist_get_next_word (l_impl, l_inst);
+ }
+ return 0;
+}
+
static unsigned int
search_list (AZObject *obj, const char *fname, unsigned int n_mm, unsigned int pm_3, unsigned int min_freq, unsigned int max_freq, int print_all_words)
{
@@ -658,6 +727,7 @@ search_list (AZObject *obj, const char *fname, unsigned int n_mm, unsigned int p
unsigned int code = 0;
qd.dict_impl = (GT4WordDictImplementation *) az_object_get_interface (obj, GT4_TYPE_WORD_DICT, (void **) &qd.dict_inst);
+ qd.slist_impl = (GT4WordSListImplementation *) az_object_get_interface (obj, GT4_TYPE_WORD_SLIST, (void **) &qd.slist_inst);
if (GT4_IS_INDEX_MAP (obj) && locations) {
qd.index_impl = (GT4WordIndexImplementation *) az_object_get_interface (obj, GT4_TYPE_WORD_INDEX, (void **) &qd.index_inst);
}
@@ -688,12 +758,16 @@ search_list (AZObject *obj, const char *fname, unsigned int n_mm, unsigned int p
az_object_shutdown (s_obj);
return GT_INCOMPATIBLE_WORDLENGTH_ERROR;
}
-
- gt4_word_slist_get_first_word (s_impl, s_inst);
- while (s_inst->idx < s_inst->num_words) {
- uint64_t word = s_inst->word;
- search_one_word (&qd, word);
- if (!gt4_word_slist_get_next_word (s_impl, s_inst)) break;
+
+ if (!n_mm) {
+ search_list_zipper (s_impl, s_inst, &qd);
+ } else {
+ gt4_word_slist_get_first_word (s_impl, s_inst);
+ while (s_inst->idx < s_inst->num_words) {
+ uint64_t word = s_inst->word;
+ search_one_word (&qd, word);
+ if (!gt4_word_slist_get_next_word (s_impl, s_inst)) break;
+ }
}
az_object_shutdown (s_obj);
return 0;
@@ -846,12 +920,15 @@ print_gc (AZObject *obj)
unsigned long long word = inst->word;
unsigned int j;
for (j = 0; j < inst->word_length; j++) {
+ //unsigned int n = (unsigned int) (word & 3);
+ //n = (n ^ (n >> 1)) & 1;
+ //if ((n == 1) || (n == 2)) count += inst->count;
count += inst->count * ((word ^ (word >> 1)) & 1);
word = word >> 2;
}
gt4_word_slist_get_next_word (impl, inst);
}
- printf ("GC\t%g\n", (double) count / (inst->num_words * inst->word_length));
+ printf ("GC\t%g\n", (double) count / (inst->sum_counts * inst->word_length));
}
void print_help (int exit_value)
=====================================
src/gmer_counter.c
=====================================
@@ -39,6 +39,9 @@ struct _SNPTable {
unsigned long long *words;
unsigned int *alleles;
/* Stats */
+ /* Number of N-s */
+ unsigned long long n_n;
+ /* Nucleotide length (excludes N) */
unsigned long long n_nucl;
unsigned long long n_gc;
/* Index */
@@ -69,6 +72,9 @@ struct _SNPQueue {
unsigned int n_full_tables;
SNPTable **full_tables;
/* Stats */
+ /* Sequence length (includes N) */
+ unsigned long long n_seq;
+ /* Nucleotide length (excludes N) */
unsigned long long n_nucl;
unsigned long long n_gc;
unsigned long long n_kmers_total;
@@ -86,6 +92,7 @@ static void print_counts (SNPQueue *snpq, GT4GmerDB *db);
static void process (GT4Queue *queue, unsigned int idx, void *arg);
static int start_sequence (GT4FastaReader *reader, void *data);
static int end_sequence (GT4FastaReader *reader, void *data);
+static int read_character (GT4FastaReader *reader, unsigned int ch, void *data);
static int read_nucleotide (GT4FastaReader *reader, unsigned int nucleotide, void *data);
static int read_word (GT4FastaReader *reader, unsigned long long word, void *data);
static int compare_counts (const void *lhs, const void *rhs);
@@ -403,7 +410,8 @@ main (int argc, const char *argv[])
}
if (stats) {
- fprintf (stdout, "#LENGTH\t%llu\n", snpq.n_nucl);
+ fprintf (stdout, "#LENGTH\t%llu\n", snpq.n_seq);
+ fprintf (stdout, "#LENGTH_ACGT\t%llu\n", snpq.n_nucl);
fprintf (stdout, "#GC\t%.3f\n", (double) snpq.n_gc / snpq.n_nucl);
fprintf (stdout, "#TOTAL_KMERS\t%llu\n", snpq.n_kmers_total);
fprintf (stdout, "#LIST_KMERS\t%llu\n", snpq.n_kmers);
@@ -713,11 +721,12 @@ read_file (SNPQueue *snpq, TaskRead *tr)
SNPTable *tbl = snpq->free_tables[--snpq->n_free_tables];
gt4_queue_unlock (&snpq->lmq.queue);
tbl->nwords = 0;
+ tbl->n_n = 0;
tbl->n_nucl = 0;
tbl->n_gc = 0;
tr->data = tbl;
/* if (debug > 0) fprintf (stderr, "Thread %d: reading file %s from %llu\n", idx, tt->_seqfile->path, tf->task_read.reader.cpos); */
- result = fasta_reader_read_nwords (&tr->reader, BLOCK_SIZE, start_sequence, end_sequence, NULL, (stats) ? read_nucleotide : NULL, read_word, tr);
+ result = fasta_reader_read_nwords (&tr->reader, BLOCK_SIZE, start_sequence, end_sequence, (stats) ? read_character : NULL, (stats) ? read_nucleotide : NULL, read_word, tr);
if (result) {
fprintf (stderr, "read_file: Fasta reader %s returned %u\n", tr->reader.id, result);
if (!recover) exit (1);
@@ -756,6 +765,8 @@ process_table (SNPQueue *snpq, TaskTable *tt, unsigned int thread_idx)
gt4_queue_lock (&snpq->lmq.queue);
/* fixme: Create separate task / mutex */
if (stats) {
+ snpq->n_seq += tbl->n_nucl;
+ snpq->n_seq += tbl->n_n;
snpq->n_nucl += tbl->n_nucl;
snpq->n_gc += tbl->n_gc;
snpq->n_kmers_total += tbl->nwords;
@@ -914,6 +925,16 @@ read_nucleotide (GT4FastaReader *reader, unsigned int nucl, void *data)
return 0;
}
+static int
+read_character (GT4FastaReader *reader, unsigned int ch, void *data)
+{
+ TaskRead *tt = (TaskRead *) data;
+ SNPTable *tbl = (SNPTable *) tt->data;
+
+ if ((ch == 'N') || (ch == 'n')) tbl->n_n += 1;
+ return 0;
+}
+
static int
compare_counts (const void *lhs, const void *rhs) {
if (*((unsigned int *) lhs) < *((unsigned int *) rhs)) return -1;
=====================================
src/set-operations.c
=====================================
@@ -127,3 +127,103 @@ gt4_write_union (AZObject *arrays[], unsigned int n_arrays, unsigned int cutoff,
return 0;
}
+
+unsigned int
+gt4_union (AZObject *objs[], unsigned int n_objs, unsigned int (*callback) (uint64_t, uint32_t *, void *), void *data)
+{
+ GT4WordSListImplementation *impls[GT4_MAX_SETS];
+ GT4WordSListInstance *insts[GT4_MAX_SETS];
+ unsigned int n_sources;
+ unsigned long long total = 0;
+ unsigned int j;
+
+ arikkei_return_val_if_fail (n_objs > 0, 1);
+ arikkei_return_val_if_fail (n_objs <= GT4_MAX_SETS, 1);
+
+ n_sources = 0;
+ for (j = 0; j < n_objs; j++) {
+ impls[n_sources] = (GT4WordSListImplementation *) az_object_get_interface (AZ_OBJECT(objs[j]), GT4_TYPE_WORD_SLIST, (void **) &insts[n_sources]);
+ if (insts[n_sources]->num_words) {
+ gt4_word_slist_get_first_word (impls[n_sources], insts[n_sources]);
+ total += insts[n_sources]->num_words;
+ n_sources += 1;
+ }
+ }
+
+ if (n_sources) {
+ unsigned long long word;
+ uint32_t counts[GT4_MAX_SETS];
+ unsigned int result;
+ /* Find first word */
+ word = 0xffffffffffffffffULL;
+ for (j = 0; j < n_objs; j++) if ((insts[j]->idx < insts[j]->num_words) && (insts[j]->word < word)) word = insts[j]->word;
+ /* Iterate until all lists are exhausted */
+ while (n_sources) {
+ unsigned long long next = 0xffffffffffffffffULL;
+ for (j = 0; j < n_objs; j++) {
+ counts[j] = 0;
+ if (insts[j]->idx < insts[j]->num_words) {
+ if (insts[j]->word == word) {
+ counts[j] = insts[j]->count;
+ if (!gt4_word_slist_get_next_word (impls[j], insts[j])) {
+ n_sources -= 1;
+ }
+ }
+ if (insts[j]->word < next) next = insts[j]->word;
+ }
+ }
+ /* Now we have all freqs */
+ result = callback (word, counts, data);
+ if (result) return result;
+ word = next;
+ }
+ }
+
+ return 0;
+}
+
+unsigned int
+gt4_is_union (AZObject *objs[], unsigned int n_objs, unsigned int (*callback) (uint64_t, uint32_t *, void *), void *data)
+{
+ GT4WordSListImplementation *impls[GT4_MAX_SETS];
+ GT4WordSListInstance *insts[GT4_MAX_SETS];
+ unsigned int n_sources;
+ unsigned long long total = 0;
+ unsigned int j;
+
+ arikkei_return_val_if_fail (n_objs > 0, 1);
+ arikkei_return_val_if_fail (n_objs <= GT4_MAX_SETS, 1);
+
+ n_sources = 0;
+ for (j = 0; j < n_objs; j++) {
+ impls[n_sources] = (GT4WordSListImplementation *) az_object_get_interface (AZ_OBJECT(objs[j]), GT4_TYPE_WORD_SLIST, (void **) &insts[n_sources]);
+ if (insts[n_sources]->num_words) {
+ gt4_word_slist_get_first_word (impls[n_sources], insts[n_sources]);
+ total += insts[n_sources]->num_words;
+ n_sources += 1;
+ }
+ }
+
+ while (insts[0]->idx < insts[0]->num_words) {
+ unsigned long long word;
+ uint32_t counts[GT4_MAX_SETS];
+ unsigned int result;
+ /* Find first word */
+ word = insts[0]->word;
+ counts[0] = insts[0]->count;
+ for (j = 1; j < n_objs; j++) {
+ counts[j] = 0;
+ while ((insts[j]->idx < insts[j]->num_words) && (insts[j]->word < word)) gt4_word_slist_get_next_word (impls[j], insts[j]);
+ if ((insts[j]->idx < insts[j]->num_words) && (insts[j]->word == word)) {
+ counts[j] = insts[j]->count;
+ }
+ }
+ /* Now we have all freqs */
+ result = callback (word, counts, data);
+ if (result) return result;
+ gt4_word_slist_get_next_word (impls[0], insts[0]);
+ }
+
+ return 0;
+}
+
=====================================
src/set-operations.h
=====================================
@@ -33,4 +33,9 @@
unsigned int gt4_write_union (AZObject *arrays[], unsigned int n_arrays, unsigned int cutoff, int ofile, GT4ListHeader *header);
+/* Execures callback for each unique kmer */
+/* If any callback returns not 0, reading stops and result is returned */
+unsigned int gt4_union (AZObject *objs[], unsigned int n_objs, unsigned int (*callback) (uint64_t, uint32_t *, void *), void *data);
+unsigned int gt4_is_union (AZObject *objs[], unsigned int n_objs, unsigned int (*callback) (uint64_t, uint32_t *, void *), void *data);
+
#endif
=====================================
src/utils.c
=====================================
@@ -78,6 +78,7 @@ scout_map (void *arg)
val += scout->cdata[i];
if (!scout->running) break;
}
+ pthread_exit (NULL);
return (void *) val;
}
@@ -93,8 +94,8 @@ void
gt4_delete_scout (GT4Scout *scout)
{
if (!scout->running) return;
- pthread_join (scout->thread, (void **) NULL);
scout->running = 0;
+ pthread_join (scout->thread, (void **) NULL);
}
/* this implementation is based on:
=====================================
src/version.h
=====================================
@@ -26,7 +26,7 @@
#define VERSION_MAJOR 4
#define VERSION_MINOR 2
-#define VERSION_MICRO 7
+#define VERSION_MICRO 16
#define VERSION_QUALIFIER "stable"
#endif
=====================================
src/word-map.c
=====================================
@@ -197,7 +197,7 @@ gt4_word_map_new (const char *listfilename, unsigned int major_version, unsigned
wmap->file_size = csize;
if (hdr->version_minor == 0) {
memcpy (&wmap->header, hdr, sizeof (struct _GT4ListHeader_4_0));
- wmap->header.list_start = sizeof (GT4ListHeader);
+ wmap->header.list_start = sizeof (struct _GT4ListHeader_4_0);
wmap->header.word_bytes = 8;
wmap->header.count_bytes = 4;
} else if (hdr->version_minor <= 2) {
@@ -309,21 +309,3 @@ gt4_word_map_lookup (GT4WordMap *wmap, unsigned long long query)
return 0;
}
-uint64_t
-gt4_word_map_get_word (const GT4WordMap *wmap, uint64_t idx)
-{
- return *((uint64_t *) (wmap->wordlist + 12 * idx));
-}
-
-uint32_t
-gt4_word_map_get_count (const GT4WordMap *wmap, uint64_t idx)
-{
- return *((uint32_t *) (wmap->wordlist + 12 * idx + 8));
-}
-
-uint64_t *
-gt4_word_map_get_word_ptr (const GT4WordMap *wmap, uint64_t idx)
-{
- return (uint64_t *) (wmap->wordlist + 12 * idx);
-}
-
=====================================
src/word-map.h
=====================================
@@ -86,19 +86,19 @@ struct _GT4WordMapClass {
unsigned int gt4_word_map_get_type (void);
-inline extern uint64_t
+static __inline__ uint64_t
gt4_word_map_get_word (const GT4WordMap *wmap, uint64_t idx)
{
return *((uint64_t *) (wmap->wordlist + 12 * idx));
}
-inline extern uint32_t
+static __inline__ uint32_t
gt4_word_map_get_count (const GT4WordMap *wmap, uint64_t idx)
{
return *((uint32_t *) (wmap->wordlist + 12 * idx + 8));
}
-inline extern uint64_t *
+static __inline__ uint64_t *
gt4_word_map_get_word_ptr (const GT4WordMap *wmap, uint64_t idx)
{
return (uint64_t *) (wmap->wordlist + 12 * idx);
View it on GitLab: https://salsa.debian.org/med-team/genometester/-/compare/fefb8db39b02d173b496bb830a1c0f6bc4fe0f1a...4238f5cc1ffbea489fd0022f9b85014dba76066b
--
View it on GitLab: https://salsa.debian.org/med-team/genometester/-/compare/fefb8db39b02d173b496bb830a1c0f6bc4fe0f1a...4238f5cc1ffbea489fd0022f9b85014dba76066b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20251211/baddaa44/attachment-0001.htm>
More information about the debian-med-commit
mailing list