[med-svn] [Git][med-team/fermi-lite][master] 7 commits: Switch to upstream HEAD using gitmode in watch file

Andreas Tille gitlab at salsa.debian.org
Fri Jan 8 10:14:48 GMT 2021



Andreas Tille pushed to branch master at Debian Med / fermi-lite


Commits:
85d2a97c by Andreas Tille at 2021-01-08T10:42:40+01:00
Switch to upstream HEAD using gitmode in watch file

- - - - -
64754d1a by Andreas Tille at 2021-01-08T10:42:56+01:00
New upstream version 0.1+git20190320.b499514
- - - - -
04b0685c by Andreas Tille at 2021-01-08T10:42:56+01:00
Update upstream source from tag 'upstream/0.1+git20190320.b499514'

Update to upstream version '0.1+git20190320.b499514'
with Debian dir cbae21d2da60fec342db6bc73e89d9da0e7a112b
- - - - -
57245fb8 by Andreas Tille at 2021-01-08T10:46:39+01:00
Update patches

- - - - -
1c85ecbd by Andreas Tille at 2021-01-08T10:47:28+01:00
routine-update: Standards-Version: 4.5.1

- - - - -
5366ecf1 by Andreas Tille at 2021-01-08T10:47:28+01:00
routine-update: debhelper-compat 13

- - - - -
24bfe49e by Andreas Tille at 2021-01-08T10:49:15+01:00
routine-update: Ready to upload to unstable

- - - - -


11 changed files:

- README.md
- bubble.c
- debian/changelog
- debian/control
- debian/patches/rename_bseq1_t.patch
- debian/watch
- example.c
- fml.h
- mag.c
- mag.h
- misc.c


Changes:

=====================================
README.md
=====================================
@@ -18,6 +18,11 @@ is able to retain heterozygous events and thus can be used to assemble diploid
 regions for the purpose of variant calling. It is one of the limited choices
 for local re-assembly and arguably the easiest to interface.
 
+If you use fermi-lite in your work, please cite the FermiKit paper:
+
+> Li H (2015) FermiKit: assembly-based variant calling for Illumina
+> resequencing data, *Bioinformatics*, **31**:3694-6.
+
 ## Usage
 
 For now, see [example.c][example] for the basic use of the library. Here is a
@@ -42,8 +47,9 @@ int main(int argc, char *argv[])
 	return 0;
 }
 ```
-The direct assembly output is in fact a graph. You may have a look at the
-[header file][header] for details.
+The `fml_assemble()` output is in fact a graph. You may have a look at the
+`fml_utg_print_gfa()` function in [misc.c][misc] about how to derive a
+[GFA][gfa] representation from an array of `fml_utg_t` objects.
 
 ## Overview of the Assembly Algorithm
 
@@ -86,3 +92,5 @@ after these procedure is the final output. Sequences in this graph are unitigs.
 [fk]: http://github.com/lh3/fermikit
 [example]: https://github.com/lh3/fermi-lite/blob/master/example.c
 [header]: https://github.com/lh3/fermi-lite/blob/master/fml.h
+[misc]: https://github.com/lh3/fermi-lite/blob/master/misc.c
+[gfa]: https://github.com/pmelsted/GFA-spec


=====================================
bubble.c
=====================================
@@ -16,8 +16,6 @@ typedef khash_t(64) hash64_t;
 #define edge_mark_del(_x) ((_x).x = (uint64_t)-2, (_x).y = 0)
 #define edge_is_del(_x)   ((_x).x == (uint64_t)-2 || (_x).y == 0)
 
-static int fm_verbose = 1;
-
 /******************
  * Closed bubbles *
  ******************/
@@ -178,7 +176,7 @@ void mag_g_simplify_bubble(mag_t *g, int max_vtx, int max_dist)
 	mag_g_merge(g, 0, 0);
 }
 
-int mag_vh_pop_simple(mag_t *g, uint64_t idd, float max_cov, float max_frac, int aggressive)
+int mag_vh_pop_simple(mag_t *g, uint64_t idd, float max_cov, float max_frac, int max_bdiff, int aggressive)
 {
 	magv_t *p = &g->v.a[idd>>1], *q[2];
 	ku128_v *r;
@@ -195,9 +193,10 @@ int mag_vh_pop_simple(mag_t *g, uint64_t idd, float max_cov, float max_frac, int
 		dir[j] = x&1;
 		q[j] = &g->v.a[x>>1];
 		if (q[j]->nei[0].n != 1 || q[j]->nei[1].n != 1) return ret; // no bubble
-		l[j] = q[j]->len - (int)(q[j]->nei[0].a->y + q[j]->nei[1].a->y);
+		l[j] = q[j]->len - (int)(q[j]->nei[0].a->y + q[j]->nei[1].a->y); // bubble length, excluding overlaps
 	}
 	if (q[0]->nei[dir[0]^1].a->x != q[1]->nei[dir[1]^1].a->x) return ret; // no bubble
+	if (l[0] - l[1] > max_bdiff || l[1] - l[0] > max_bdiff) return 1; // huge bubble differences
 	for (j = 0; j < 2; ++j) { // set seq[] and cov[], and compute avg[]
 		if (l[j] > 0) {
 			seq[j] = malloc(l[j]<<1);
@@ -254,16 +253,16 @@ int mag_vh_pop_simple(mag_t *g, uint64_t idd, float max_cov, float max_frac, int
 	return ret;
 }
 
-void mag_g_pop_simple(mag_t *g, float max_cov, float max_frac, int min_merge_len, int aggressive)
+void mag_g_pop_simple(mag_t *g, float max_cov, float max_frac, int min_merge_len, int max_bdiff, int aggressive)
 {
 	int64_t i, n_examined = 0, n_popped = 0;
 	int ret;
 
 	for (i = 0; i < g->v.n; ++i) {
-		ret = mag_vh_pop_simple(g, i<<1|0, max_cov, max_frac, aggressive);
+		ret = mag_vh_pop_simple(g, i<<1|0, max_cov, max_frac, max_bdiff, aggressive);
 		if (ret >= 1) ++n_examined;
 		if (ret >= 2) ++n_popped;
-		ret = mag_vh_pop_simple(g, i<<1|1, max_cov, max_frac, aggressive);
+		ret = mag_vh_pop_simple(g, i<<1|1, max_cov, max_frac, max_bdiff, aggressive);
 		if (ret >= 1) ++n_examined;
 		if (ret >= 2) ++n_popped;
 	}


=====================================
debian/changelog
=====================================
@@ -1,10 +1,16 @@
-fermi-lite (0.1-14) UNRELEASED; urgency=medium
+fermi-lite (0.1+git20190320.b499514-1) unstable; urgency=medium
 
+  [ Michael R. Crusoe ]
   * Team upload.
+  * Switch to upstream HEAD using gitmode in watch file
   * Remove trailing whitespace in debian/changelog (routine-update)
   * Use the libsimde-dev package instead of our code copy
 
- -- Michael R. Crusoe <michael.crusoe at gmail.com>  Mon, 20 Apr 2020 12:54:26 +0200
+  [ Andreas Tille ]
+  * Standards-Version: 4.5.1 (routine-update)
+  * debhelper-compat 13 (routine-update)
+
+ -- Andreas Tille <tille at debian.org>  Fri, 08 Jan 2021 10:47:39 +0100
 
 fermi-lite (0.1-13) unstable; urgency=medium
 


=====================================
debian/control
=====================================
@@ -3,12 +3,12 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.
 Uploaders: Sascha Steinbiss <satta at debian.org>, Nilesh Patra <npatra974 at gmail.com>
 Section: science
 Priority: optional
-Build-Depends: debhelper-compat (= 12),
+Build-Depends: debhelper-compat (= 13),
                d-shlibs,
                zlib1g-dev,
                libsimde-dev,
                help2man
-Standards-Version: 4.5.0
+Standards-Version: 4.5.1
 Vcs-Browser: https://salsa.debian.org/med-team/fermi-lite
 Vcs-Git: https://salsa.debian.org/med-team/fermi-lite.git
 Homepage: https://github.com/lh3/fermi-lite


=====================================
debian/patches/rename_bseq1_t.patch
=====================================
@@ -109,7 +109,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
 --- a/misc.c
 +++ b/misc.c
 @@ -40,7 +40,7 @@ void fml_opt_init(fml_opt_t *opt)
- 	opt->mag_opt.flag = MAG_F_NO_SIMPL;
+ 	opt->mag_opt.flag = MAG_F_NO_SIMPL | MAG_F_POPOPEN;
  }
  
 -void fml_opt_adjust(fml_opt_t *opt, int n_seqs, const bseq1_t *seqs)
@@ -126,7 +126,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
  {
  	mrope_t *mr;
  	kstring_t str = {0,0,0};
-@@ -75,7 +75,7 @@ struct rld_t *fml_fmi_gen(int n, bseq1_t
+@@ -80,7 +80,7 @@ struct rld_t *fml_fmi_gen(int n, bseq1_t
  	mr = mr_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN, MR_SO_RCLO);
  	for (k = 0; k < n; ++k) {
  		int i;
@@ -135,7 +135,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
  		if (s->l_seq == 0) continue;
  		free(s->qual);
  		for (i = 0; i < s->l_seq; ++i)
-@@ -116,7 +116,7 @@ struct rld_t *fml_fmi_gen(int n, bseq1_t
+@@ -121,7 +121,7 @@ struct rld_t *fml_fmi_gen(int n, bseq1_t
  	return e;
  }
  
@@ -144,7 +144,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
  {
  	return fml_fmi_gen(n, seq, opt->n_threads > 1? 1 : 0);
  }
-@@ -251,7 +251,7 @@ void fml_utg_destroy(int n, fml_utg_t *u
+@@ -277,7 +277,7 @@ void fml_utg_destroy(int n, fml_utg_t *u
  
  #define MAG_MIN_NSR_COEF .1
  
@@ -158,21 +158,21 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
 @@ -7,7 +7,7 @@ int main(int argc, char *argv[])
  {
  	fml_opt_t opt;
- 	int c, n_seqs, n_utg;
+ 	int c, n_seqs, n_utg, gfa_out = 0;
 -	bseq1_t *seqs;
 +	fml_seq1_t *seqs;
  	fml_utg_t *utg;
  
  	fml_opt_init(&opt);
-@@ -34,7 +34,7 @@ int main(int argc, char *argv[])
- 		fprintf(stderr, "  -A              discard heterozygotes (apply this to assemble bacterial genomes)\n");
+@@ -41,7 +41,7 @@ int main(int argc, char *argv[])
+ 		fprintf(stderr, "  -g              output the assembly graph in the GFA format\n");
  		return 1;
  	}
 -	seqs = bseq_read(argv[optind], &n_seqs);
 +	seqs = fml_seq_read(argv[optind], &n_seqs);
  	utg = fml_assemble(&opt, n_seqs, seqs, &n_utg);
- 	fml_utg_print(n_utg, utg);
- 	fml_utg_destroy(n_utg, utg);
+ 	if (!gfa_out) fml_utg_print(n_utg, utg);
+ 	else fml_utg_print_gfa(n_utg, utg);
 --- a/internal.h
 +++ b/internal.h
 @@ -12,7 +12,7 @@ extern "C" {
@@ -189,13 +189,13 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
 @@ -8,7 +8,7 @@
  typedef struct {
  	int32_t l_seq;
- 	char *seq, *qual;
+ 	char *seq, *qual; // NULL-terminated strings; length expected to match $l_seq
 -} bseq1_t;
 +} fml_seq1_t;
  
- #define MAG_F_AGGRESSIVE 0x20
- #define MAG_F_NO_SIMPL   0x80
-@@ -60,7 +60,7 @@ extern "C" {
+ #define MAG_F_AGGRESSIVE 0x20 // pop variant bubbles (not default)
+ #define MAG_F_POPOPEN    0x40 // aggressive tip trimming (default)
+@@ -63,7 +63,7 @@ extern "C" {
   *
   * @return array of sequences
   */
@@ -204,7 +204,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
  
  /**
   * Initialize default parameters
-@@ -79,7 +79,7 @@ void fml_opt_init(fml_opt_t *opt);
+@@ -82,7 +82,7 @@ void fml_opt_init(fml_opt_t *opt);
   *
   * @return array of unitigs
   */
@@ -213,7 +213,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
  
  /**
   * Free unitigs
-@@ -100,7 +100,7 @@ void fml_utg_destroy(int n_utg, fml_utg_
+@@ -103,7 +103,7 @@ void fml_utg_destroy(int n_utg, fml_utg_
   * @param n_seqs    number of sequences
   * @param seqs      array of sequences
   */
@@ -222,7 +222,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
  
  /**
   * Error correction
-@@ -111,8 +111,8 @@ void fml_opt_adjust(fml_opt_t *opt, int
+@@ -114,8 +114,8 @@ void fml_opt_adjust(fml_opt_t *opt, int
   *
   * @return k-mer coverage
   */
@@ -233,9 +233,9 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
  
  /**
   * Construct FMD-index
-@@ -123,7 +123,7 @@ float fml_fltuniq(const fml_opt_t *opt,
+@@ -126,7 +126,7 @@ float fml_fltuniq(const fml_opt_t *opt,
   *
-  * @return FMD-index
+  * @return FMD-index on success; NULL if all input sequences are zero in length
   */
 -struct rld_t *fml_seq2fmi(const fml_opt_t *opt, int n, bseq1_t *seq);
 +struct rld_t *fml_seq2fmi(const fml_opt_t *opt, int n, fml_seq1_t *seq);
@@ -244,7 +244,7 @@ Last-Update: Thu, 02 Feb 2017 10:57:56 +0100
   * Generate initial overlap graph
 --- a/README.md
 +++ b/README.md
-@@ -29,11 +29,11 @@ sketch of the example:
+@@ -34,11 +34,11 @@ sketch of the example:
  int main(int argc, char *argv[])
  {
  	int i, n_seqs, n_utgs;


=====================================
debian/watch
=====================================
@@ -1,3 +1,7 @@
-version=3
-https://github.com/lh3/fermi-lite/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
+version=4
+
+opts="mode=git,pretty=0.1+git%cd.%h" \
+    https://github.com/lh3/fermi-lite.git HEAD
+
+#https://github.com/lh3/fermi-lite/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
 


=====================================
example.c
=====================================
@@ -6,17 +6,21 @@
 int main(int argc, char *argv[])
 {
 	fml_opt_t opt;
-	int c, n_seqs, n_utg;
+	int c, n_seqs, n_utg, gfa_out = 0;
 	bseq1_t *seqs;
 	fml_utg_t *utg;
 
 	fml_opt_init(&opt);
-	while ((c = getopt(argc, argv, "Ae:l:r:t:c:")) >= 0) {
+	while ((c = getopt(argc, argv, "gOAe:l:r:t:c:d:v:")) >= 0) {
 		if (c == 'e') opt.ec_k = atoi(optarg);
 		else if (c == 'l') opt.min_asm_ovlp = atoi(optarg);
 		else if (c == 'r') opt.mag_opt.min_dratio1 = atof(optarg);
 		else if (c == 'A') opt.mag_opt.flag |= MAG_F_AGGRESSIVE;
+		else if (c == 'O') opt.mag_opt.flag &= ~MAG_F_POPOPEN;
+		else if (c == 'd') opt.mag_opt.max_bdiff = atoi(optarg);
 		else if (c == 't') opt.n_threads = atoi(optarg);
+		else if (c == 'g') gfa_out = 1;
+		else if (c == 'v') fm_verbose = atoi(optarg);
 		else if (c == 'c') {
 			char *p;
 			opt.min_cnt = strtol(optarg, &p, 10);
@@ -31,12 +35,16 @@ int main(int argc, char *argv[])
 		fprintf(stderr, "  -l INT          min overlap length during initial assembly [%d]\n", opt.min_asm_ovlp);
 		fprintf(stderr, "  -r FLOAT        drop an overlap if its length is below maxOvlpLen*FLOAT [%g]\n", opt.mag_opt.min_dratio1);
 		fprintf(stderr, "  -t INT          number of threads (don't use multi-threading for small data sets) [%d]\n", opt.n_threads);
-		fprintf(stderr, "  -A              discard heterozygotes (apply this to assemble bacterial genomes)\n");
+		fprintf(stderr, "  -d INT          retain a bubble if one side is longer than the other side by >INT-bp [%d]\n", opt.mag_opt.max_bdiff);
+		fprintf(stderr, "  -A              discard heterozygotes (apply this to assemble bacterial genomes; override -O)\n");
+		fprintf(stderr, "  -O              don't apply aggressive tip trimming\n");
+		fprintf(stderr, "  -g              output the assembly graph in the GFA format\n");
 		return 1;
 	}
 	seqs = bseq_read(argv[optind], &n_seqs);
 	utg = fml_assemble(&opt, n_seqs, seqs, &n_utg);
-	fml_utg_print(n_utg, utg);
+	if (!gfa_out) fml_utg_print(n_utg, utg);
+	else fml_utg_print_gfa(n_utg, utg);
 	fml_utg_destroy(n_utg, utg);
 	return 0;
 }


=====================================
fml.h
=====================================
@@ -1,20 +1,21 @@
 #ifndef FML_H
 #define FML_H
 
-#define FML_VERSION "r41"
+#define FML_VERSION "r55"
 
 #include <stdint.h>
 
 typedef struct {
 	int32_t l_seq;
-	char *seq, *qual;
+	char *seq, *qual; // NULL-terminated strings; length expected to match $l_seq
 } bseq1_t;
 
-#define MAG_F_AGGRESSIVE 0x20
-#define MAG_F_NO_SIMPL   0x80
+#define MAG_F_AGGRESSIVE 0x20 // pop variant bubbles (not default)
+#define MAG_F_POPOPEN    0x40 // aggressive tip trimming (default)
+#define MAG_F_NO_SIMPL   0x80 // skip bubble simplification (default)
 
 typedef struct {
-	int flag, min_ovlp, min_elen, min_ensr, min_insr, max_bdist, max_bvtx, min_merge_len, trim_len, trim_depth;
+	int flag, min_ovlp, min_elen, min_ensr, min_insr, max_bdist, max_bdiff, max_bvtx, min_merge_len, trim_len, trim_depth;
 	float min_dratio1, max_bcov, max_bfrac;
 } magopt_t;
 
@@ -31,8 +32,8 @@ struct rld_t;
 struct mag_t;
 
 typedef struct {
-	uint32_t tid;
-	uint32_t len:31, from:1;
+	uint32_t len:31, from:1; // $from and $to: 0 meaning overlapping 5'-end; 1 overlapping 3'-end
+	uint32_t id:31, to:1;    // $id: unitig number
 } fml_ovlp_t;
 
 typedef struct {
@@ -44,6 +45,8 @@ typedef struct {
 	fml_ovlp_t *ovlp; // overlaps, of size n_ovlp[0]+n_ovlp[1]
 } fml_utg_t;
 
+extern int fm_verbose;
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -121,7 +124,7 @@ float fml_fltuniq(const fml_opt_t *opt, int n, bseq1_t *seq);
  * @param n         number of sequences
  * @param seq       array of sequences; FREED on return
  *
- * @return FMD-index
+ * @return FMD-index on success; NULL if all input sequences are zero in length
  */
 struct rld_t *fml_seq2fmi(const fml_opt_t *opt, int n, bseq1_t *seq);
 
@@ -161,6 +164,14 @@ fml_utg_t *fml_mag2utg(struct mag_t *g, int *n_utg);
  */
 void fml_utg_print(int n_utgs, const fml_utg_t *utg);
 
+/**
+ * Output unitig graph in the GFA format
+ *
+ * @param n_utg     number of unitigs
+ * @param utg       array of unitigs
+ */
+void fml_utg_print_gfa(int n, const fml_utg_t *utg);
+
 /**
  * Deallocate an FM-index
  *


=====================================
mag.c
=====================================
@@ -28,7 +28,7 @@ KSORT_INIT_GENERIC(uint64_t)
 #define edge_mark_del(_x) ((_x).x = (uint64_t)-2, (_x).y = 0)
 #define edge_is_del(_x)   ((_x).x == (uint64_t)-2 || (_x).y == 0)
 
-static int fm_verbose = 1;
+int fm_verbose = 1;
 
 /*********************
  * Vector operations *
@@ -553,6 +553,7 @@ void mag_init_opt(magopt_t *o)
 	o->max_bfrac = 0.15;
 	o->max_bvtx = 64;
 	o->max_bdist = 512;
+	o->max_bdiff = 50;
 }
 
 void mag_g_clean(mag_t *g, const magopt_t *opt)
@@ -568,15 +569,15 @@ void mag_g_clean(mag_t *g, const magopt_t *opt)
 	for (j = 2; j <= opt->min_ensr; ++j)
 		mag_g_rm_vext(g, opt->min_elen, j);
 	mag_g_merge(g, 0, opt->min_merge_len);
-	if (opt->flag & MAG_F_AGGRESSIVE) mag_g_pop_open(g, opt->min_elen);
+	if ((opt->flag & MAG_F_AGGRESSIVE) || (opt->flag & MAG_F_POPOPEN)) mag_g_pop_open(g, opt->min_elen);
 	if (!(opt->flag & MAG_F_NO_SIMPL)) mag_g_simplify_bubble(g, opt->max_bvtx, opt->max_bdist);
-	mag_g_pop_simple(g, opt->max_bcov, opt->max_bfrac, opt->min_merge_len, opt->flag & MAG_F_AGGRESSIVE);
+	mag_g_pop_simple(g, opt->max_bcov, opt->max_bfrac, opt->min_merge_len, opt->max_bdiff, opt->flag & MAG_F_AGGRESSIVE);
 	mag_g_rm_vint(g, opt->min_elen, opt->min_insr, g->min_ovlp);
 	mag_g_rm_edge(g, g->min_ovlp, opt->min_dratio1, opt->min_elen, opt->min_ensr);
 	mag_g_merge(g, 1, opt->min_merge_len);
 	mag_g_rm_vext(g, opt->min_elen, opt->min_ensr);
 	mag_g_merge(g, 0, opt->min_merge_len);
-	if (opt->flag & MAG_F_AGGRESSIVE) mag_g_pop_open(g, opt->min_elen);
+	if ((opt->flag & MAG_F_AGGRESSIVE) || (opt->flag & MAG_F_POPOPEN)) mag_g_pop_open(g, opt->min_elen);
 	mag_g_rm_vext(g, opt->min_elen, opt->min_ensr);
 	mag_g_merge(g, 0, opt->min_merge_len);
 }


=====================================
mag.h
=====================================
@@ -49,7 +49,7 @@ extern "C" {
 	void mag_g_rm_edge(mag_t *g, int min_ovlp, double min_ratio, int min_len, int min_nsr);
 	void mag_g_merge(mag_t *g, int rmdup, int min_merge_len);
 	void mag_g_simplify_bubble(mag_t *g, int max_vtx, int max_dist);
-	void mag_g_pop_simple(mag_t *g, float max_cov, float max_frac, int min_merge_len, int aggressive);
+	void mag_g_pop_simple(mag_t *g, float max_cov, float max_frac, int min_merge_len, int max_bdiff, int aggressive);
 	void mag_g_pop_open(mag_t *g, int min_elen);
 	void mag_g_trim_open(mag_t *g, const magopt_t *opt);
 


=====================================
misc.c
=====================================
@@ -37,7 +37,7 @@ void fml_opt_init(fml_opt_t *opt)
 	opt->min_asm_ovlp = 33;
 	opt->min_merge_len = 0;
 	mag_init_opt(&opt->mag_opt);
-	opt->mag_opt.flag = MAG_F_NO_SIMPL;
+	opt->mag_opt.flag = MAG_F_NO_SIMPL | MAG_F_POPOPEN;
 }
 
 void fml_opt_adjust(fml_opt_t *opt, int n_seqs, const bseq1_t *seqs)
@@ -72,6 +72,11 @@ struct rld_t *fml_fmi_gen(int n, bseq1_t *seq, int is_mt)
 	rld_t *e = 0;
 	int k;
 
+	for (k = 0; k < n; ++k)
+		if (seq[k].l_seq > 0)
+			break;
+	if (k == n) return 0;
+
 	mr = mr_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN, MR_SO_RCLO);
 	for (k = 0; k < n; ++k) {
 		int i;
@@ -193,7 +198,8 @@ fml_utg_t *fml_mag2utg(struct mag_t *g, int *n)
 				t = &q->ovlp[a++];
 				k = kh_get(64, h, s->x);
 				assert(k != kh_end(h));
-				t->tid = kh_val(h, k);
+				t->id = kh_val(h, k) >> 1;
+				t->to = kh_val(h, k) & 1;
 				t->len = s->y;
 				t->from = from;
 			}
@@ -217,13 +223,13 @@ void fml_utg_print(int n, const fml_utg_t *utg)
 		kputc('\t', &out); kputw(u->nsr, &out);
 		kputc('\t', &out);
 		for (j = 0; j < u->n_ovlp[0]; ++j) {
-			kputw(u->ovlp[j].tid, &out); kputc(',', &out);
+			kputw(u->ovlp[j].id<<1|u->ovlp[j].to, &out); kputc(',', &out);
 			kputw(u->ovlp[j].len, &out); kputc(';', &out);
 		}
 		if (u->n_ovlp[0] == 0) kputc('.', &out);
 		kputc('\t', &out);
 		for (; j < u->n_ovlp[0] + u->n_ovlp[1]; ++j) {
-			kputw(u->ovlp[j].tid, &out); kputc(',', &out);
+			kputw(u->ovlp[j].id<<1|u->ovlp[j].to, &out); kputc(',', &out);
 			kputw(u->ovlp[j].len, &out); kputc(';', &out);
 		}
 		if (u->n_ovlp[1] == 0) kputc('.', &out);
@@ -238,6 +244,26 @@ void fml_utg_print(int n, const fml_utg_t *utg)
 	free(out.s);
 }
 
+void fml_utg_print_gfa(int n, const fml_utg_t *utg)
+{
+	int i, j;
+	FILE *fp = stdout;
+	fputs("H\tVN:Z:1.0\n", fp);
+	for (i = 0; i < n; ++i) {
+		const fml_utg_t *u = &utg[i];
+		fprintf(fp, "S\t%d\t", i);
+		fputs(u->seq, fp);
+		fprintf(fp, "\tLN:i:%d\tRC:i:%d\tPD:Z:", u->len, u->nsr);
+		fputs(u->cov, fp);
+		fputc('\n', fp);
+		for (j = 0; j < u->n_ovlp[0] + u->n_ovlp[1]; ++j) {
+			fml_ovlp_t *o = &u->ovlp[j];
+			if (i < o->id)
+				fprintf(fp, "L\t%d\t%c\t%d\t%c\t%dM\n", i, "+-"[!o->from], o->id, "+-"[o->to], o->len);
+		}
+	}
+}
+
 void fml_utg_destroy(int n, fml_utg_t *utg)
 {
 	int i;
@@ -259,10 +285,12 @@ fml_utg_t *fml_assemble(const fml_opt_t *opt0, int n_seqs, bseq1_t *seqs, int *n
 	fml_opt_t opt = *opt0;
 	float kcov;
 
+	*n_utg = 0;
 	fml_opt_adjust(&opt, n_seqs, seqs);
 	if (opt.ec_k >= 0) fml_correct(&opt, n_seqs, seqs);
 	kcov = fml_fltuniq(&opt, n_seqs, seqs);
 	e = fml_seq2fmi(&opt, n_seqs, seqs);
+	if (e == 0) return 0; // this may happen when all sequences are filtered out
 	g = fml_fmi2mag(&opt, e);
 	opt.mag_opt.min_ensr = opt.mag_opt.min_ensr > kcov * MAG_MIN_NSR_COEF? opt.mag_opt.min_ensr : (int)(kcov * MAG_MIN_NSR_COEF + .499);
 	opt.mag_opt.min_ensr = opt.mag_opt.min_ensr < opt0->max_cnt? opt.mag_opt.min_ensr : opt0->max_cnt;



View it on GitLab: https://salsa.debian.org/med-team/fermi-lite/-/compare/a3135da8ba6e517fb54a0f3950c62653e80783e7...24bfe49e5975666ec6bf6691c37b7ee0cc844aa1

-- 
View it on GitLab: https://salsa.debian.org/med-team/fermi-lite/-/compare/a3135da8ba6e517fb54a0f3950c62653e80783e7...24bfe49e5975666ec6bf6691c37b7ee0cc844aa1
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210108/a8a207b3/attachment-0001.html>


More information about the debian-med-commit mailing list