[med-svn] [Git][med-team/unifrac-tools][upstream] New upstream version 1.2

Andreas Tille (@tille) gitlab at salsa.debian.org
Tue Jan 3 12:47:49 GMT 2023



Andreas Tille pushed to branch upstream at Debian Med / unifrac-tools


Commits:
b18dc326 by Andreas Tille at 2023-01-02T07:47:01+01:00
New upstream version 1.2
- - - - -


14 changed files:

- .github/workflows/main.yml
- README.md
- scripts/install_hpc_sdk.sh
- src/Makefile
- src/api.cpp
- src/api.hpp
- src/biom.cpp
- src/biom.hpp
- src/su.cpp
- src/test_su.cpp
- src/tree.cpp
- src/tree.hpp
- src/unifrac_internal.cpp
- src/unifrac_task.hpp


Changes:

=====================================
.github/workflows/main.yml
=====================================
@@ -13,12 +13,13 @@ jobs:
   build-and-test:
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest]
+        os: [ubuntu-latest, macos-latest, linux-gpu-cuda]
     runs-on: ${{ matrix.os }}
     steps:
     - uses: actions/checkout at v2
     - uses: conda-incubator/setup-miniconda at v2
-      with:        
+      with:
+        miniconda-version: "latest"
         auto-update-conda: true
     - name: Install 
       shell: bash -l {0}
@@ -32,12 +33,15 @@ jobs:
           sudo mv -f /Library/Developer/CommandLineTools /Library/Developer/CommandLineTools.org
         fi
 
+        df -h .
         if [[ "$(uname -s)" == "Linux" ]];
         then
-          conda create --yes -n unifrac -c conda-forge -c bioconda gxx_linux-64 hdf5 mkl-include lz4 zlib hdf5-static libcblas liblapacke make curl
+          conda create -q --yes -n unifrac -c conda-forge -c bioconda gxx_linux-64 hdf5 mkl-include lz4 zlib hdf5-static libcblas liblapacke make curl
         else
-          conda create --yes -n unifrac -c conda-forge -c bioconda clangxx_osx-64 hdf5 mkl-include lz4 hdf5-static libcblas liblapacke make curl
+          conda create -q --yes -n unifrac -c conda-forge -c bioconda clangxx_osx-64 hdf5 mkl-include lz4 hdf5-static libcblas liblapacke make curl
         fi 
+        conda clean --yes -t
+        df -h .
         conda activate unifrac
         echo "$(uname -s)"
         if [[ "$(uname -s)" == "Linux" ]];
@@ -52,19 +56,21 @@ jobs:
         which h5c++
         if [[ "$(uname -s)" == "Linux" ]];
         then
-          ./scripts/install_hpc_sdk.sh
+          ./scripts/install_hpc_sdk.sh </dev/null
           source setup_nv_h5.sh
         fi
+        df -h .
         export PERFORMING_CONDA_BUILD=True
         make api && \
         make main && \
         make install && \
         make test_binaries
+        df -h .
         pushd src
         if [[ "$(uname -s)" == "Linux" ]]; 
         then
           rm -f ~/.R/Makevars
-          conda install --yes -c conda-forge r-base
+          conda install -q --yes -c conda-forge r-base
           unset CXXFLAGS
           unset CFLAGS
           unset DEBUG_CXXFLAGS
@@ -88,6 +94,13 @@ jobs:
         popd
         pushd test
         ./capi_test 1
+        # explicitly check that we do not have hdf5 dependencies
+        if [[ "$(uname -s)" == "Linux" ]];
+        then
+          ldd ./capi_test |awk 'BEGIN{a=0}/hdf/{a=a+1;print $0}END{if (a==0) {print "No dynamic hdf5 found"} else {exit 2}}'
+        else
+          otool -L ./capi_test|awk 'BEGIN{a=0}/hdf/{a=a+1;print $0}END{if (a==0) {print "No dynamic hdf5 found"} else {exit 2}}'
+        fi
         popd
         pushd src/testdata
         conda install --yes -c conda-forge h5py
@@ -96,30 +109,35 @@ jobs:
         ./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5
         ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1
         rm -f t1.h5
+        # retry with default precision handling
+        time ssu -m weighted_unnormalized -i test500.biom  -t test500.tre --pcoa 4  -r hdf5 -o t1.h5
+        ./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5
+        ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1
+        rm -f t1.h5
         time ssu -f -m weighted_unnormalized_fp32 -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp32 -o t1.h5
         # matrrix will be different, but PCOA similar
         ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1
         rm -f t1.h5
-        time ssu -m weighted_unnormalized -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp64 -o t1.h5
+        time ssu -m weighted_unnormalized_fp64 -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp64 -o t1.h5
         # minimal precision loss between fp32 and fp64
         ./compare_unifrac_matrix.py test500.weighted_unnormalized_fp32.h5 t1.h5 1.e-5
         ./compare_unifrac_pcoa.py test500.weighted_unnormalized_fp32.h5 t1.h5 3 0.1
         rm -f t1.h5
         # weighted_normalized
-        time ssu -f -m weighted_normalized_fp32 -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp32 -o t1.h5
+        time ssu -f -m weighted_normalized -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp32 -o t1.h5
         ./compare_unifrac_matrix.py test500.weighted_normalized_fp32.f.h5 t1.h5 1.e-5
         ./compare_unifrac_pcoa.py test500.weighted_normalized_fp32.f.h5 t1.h5 3 0.1
         rm -f t1.h5
-        time ssu -f -m weighted_normalized -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp64 -o t1.h5
+        time ssu -f -m weighted_normalized_fp64 -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp64 -o t1.h5
         ./compare_unifrac_matrix.py test500.weighted_normalized_fp32.f.h5 t1.h5 1.e-5
         ./compare_unifrac_pcoa.py test500.weighted_normalized_fp32.f.h5 t1.h5 3 0.1
         rm -f t1.h5
         # unweighted
-        time ssu -f -m unweighted_fp32 -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp32 -o t1.h5
+        time ssu -f -m unweighted -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp32 -o t1.h5
         ./compare_unifrac_matrix.py test500.unweighted_fp32.f.h5 t1.h5 1.e-5
         ./compare_unifrac_pcoa.py test500.unweighted_fp32.f.h5 t1.h5 3 0.1
         rm -f t1.h5
-        time ssu -f -m unweighted -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp64 -o t1.h5
+        time ssu -f -m unweighted_fp64 -i test500.biom  -t test500.tre --pcoa 4  -r hdf5_fp64 -o t1.h5
         ./compare_unifrac_matrix.py test500.unweighted_fp32.f.h5 t1.h5 1.e-5
         ./compare_unifrac_pcoa.py test500.unweighted_fp32.f.h5 t1.h5 3 0.1
         rm -f t1.h5


=====================================
README.md
=====================================
@@ -127,16 +127,17 @@ The methods can be used directly through the command line after install:
     $ which ssu
     /Users/<username>/miniconda3/envs/unifrac/bin/ssu
     $ ssu --help
-    usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-n threads] [-a alpha] [-f]  [--vaw]
-        [--mode [MODE]] [--start starting-stripe] [--stop stopping-stripe] [--partial-pattern <glob>]
+    usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-a alpha] [-f]  [--vaw]
+        [--mode MODE] [--start starting-stripe] [--stop stopping-stripe] [--partial-pattern <glob>]
         [--n-partials number_of_partitions] [--report-bare] [--format|-r out-mode]
+        [--n-substeps n] [--pcoa dims] [--diskbuf path]
 
         -i		The input BIOM table.
         -t		The input phylogeny in newick.
         -m		The method, [unweighted | weighted_normalized | weighted_unnormalized | generalized | 
+                                 unweighted_fp64 | weighted_normalized_fp64 | weighted_unnormalized_fp64 | generalized_fp64 |
                                  unweighted_fp32 | weighted_normalized_fp32 | weighted_unnormalized_fp32 | generalized_fp32].
         -o		The output distance matrix.
-        -n		[OPTIONAL] The number of threads, default is 1.
         -a		[OPTIONAL] Generalized UniFrac alpha, default is 1.
         -f		[OPTIONAL] Bypass tips, reduces compute by about 50%.
         --vaw	[OPTIONAL] Variance adjusted, default is to not adjust for variance.
@@ -148,8 +149,9 @@ The methods can be used directly through the command line after install:
         --start	[OPTIONAL] If mode==partial, the starting stripe.
         --stop	[OPTIONAL] If mode==partial, the stopping stripe.
         --partial-pattern	[OPTIONAL] If mode==merge-partial, a glob pattern for partial outputs to merge.
-        --n-partials	[OPTIONAL] If mode==partial-report, the number of partitions to compute.
+        --n-partials 	[OPTIONAL] If mode==partial-report, the number of partitions to compute.
         --report-bare	[OPTIONAL] If mode==partial-report, produce barebones output.
+        --n-substeps 	[OPTIONAL] Internally split the problem in n substeps for reduced memory footprint, default is 1.
         --format|-r	[OPTIONAL]  Output format:
                                  ascii : [DEFAULT] Original ASCII format.
                                  hfd5 : HFD5 format.  May be fp32 or fp64, depending on method.
@@ -157,9 +159,16 @@ The methods can be used directly through the command line after install:
                                  hdf5_fp64 : HFD5 format, using fp64 precision.
         --pcoa	[OPTIONAL] Number of PCoA dimensions to compute (default: 10, do not compute if 0)
         --diskbuf	[OPTIONAL] Use a disk buffer to reduce memory footprint. Provide path to a fast partition (ideally NVMe).
+        -n		[OPTIONAL] DEPRECATED, no-op.
+
+    Environment variables: 
+        CPU parallelism is controlled by OMP_NUM_THREADS. If not defined, all detected core will be used.
+        GPU offload can be disabled with UNIFRAC_USE_GPU=N. By default, if a NVIDIA GPU is detected, it will be used.
+        A specific GPU can be selected with ACC_DEVICE_NUM. If not defined, the first GPU will be used.
 
     Citations: 
         For UniFrac, please see:
+            Sfiligoi et al. mSystems 2022; DOI: 10.1128/msystems.00028-22
             McDonald et al. Nature Methods 2018; DOI: 10.1038/s41592-018-0187-8
             Lozupone and Knight Appl Environ Microbiol 2005; DOI: 10.1128/AEM.71.12.8228-8235.2005
             Lozupone et al. Appl Environ Microbiol 2007; DOI: 10.1128/AEM.01996-06


=====================================
scripts/install_hpc_sdk.sh
=====================================
@@ -41,6 +41,7 @@ if [ "x${NV_URL}" == "x" ]; then
   NV_URL=https://developer.download.nvidia.com/hpc-sdk/21.7/nvhpc_2021_217_Linux_x86_64_cuda_multi.tar.gz
 fi
 
+echo "Downloading the NVIDIA HPC SDK"
 # Defaults to using curl
 # set USE_CURL=N if you want to use aria2 or wget
 if [ "x${USE_CURL}" == "x" ]; then
@@ -51,7 +52,8 @@ if [ "x${USE_CURL}" == "x" ]; then
     tar xpzf nvhpc.tgz
     rm -f nvhpc.tgz
   else
-    curl "${NV_URL}" | tar xpzf -
+    # Do not unpack things we do not use for unifrac
+    curl -s "${NV_URL}" | tar xpzf - --exclude '*libcublas*' --exclude '*libcufft*' --exclude '*libcusparse*' --exclude '*libcusolver*' --exclude '*libcurand*' --exclude '*profilers*' --exclude '*comm_libs*' --exclude '*/doc/*' --exclude '*/plugin*'
   fi
 elif [ "x${USE_ARIA2}" == "x" ]; then
   aria2c "${NV_URL}"
@@ -63,6 +65,8 @@ else
   rm -f nvhpc_*.tar.gz
 fi
 
+echo "Installing NVIDIA HPC SDK"
+
 # must patch the install scripts to find the right gcc
 for f in nvhpc_*/install_components/install nvhpc_*/install_components/*/*/compilers/bin/makelocalrc nvhpc_*/install_components/install_cuda; do
   sed -i -e "s#PATH=/#PATH=$PWD/conda_nv_bins:/#g" $f
@@ -123,4 +127,7 @@ source $PWD/setup_scripts/setup_nv_hpc_bins.sh
 PATH=${PWD}/conda_h5:\$PATH
 EOF
 
+# we don't need the install dir anymore
+rm -fr nvhpc_*
+
 echo "Setup script avaiabile in $PWD/setup_nv_h5.sh"


=====================================
src/Makefile
=====================================
@@ -58,9 +58,9 @@ ifndef NOGPU
 	            ACCCPPFLAGS += -ta=tesla:ccall
 		else
 	            ACCCPPFLAGS += -ta=tesla
+		    # optional info
+		    ACCCPPFLAGS += -Minfo=accel
                 endif
-		# optional info
-		ACCCPPFLAGS += -Minfo=accel
 	        LDDFLAGS += -shlib -acc -Bstatic_pgi
 	        EXEFLAGS += -acc -Bstatic_pgi
 	endif
@@ -79,8 +79,10 @@ else
 endif
 
 ifeq (,$(findstring pgi,$(COMPILER)))
+    ifneq ($(PERFORMING_CONDA_BUILD),True)
 	# basically, not gcc
 	CPPFLAGS += -Wextra -Wno-unused-parameter
+    endif
 endif
 
 BLASLIB=-llapacke -lcblas
@@ -110,7 +112,7 @@ faithpd: faithpd.cpp tree.o biom.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_F
 	$(CXX) $(CPPFLAGS) $(EXEFLAGS) faithpd.cpp -o faithpd tree.o biom.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lhdf5_cpp -llz4 $(BLASLIB) -lpthread
 
 libssu.so: tree.o biom.o unifrac.o cmd.o skbio_alt.o api.o $(UNIFRAC_FILES)
-	$(CXX) $(LDDFLAGS) -o libssu.so tree.o biom.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lc -lhdf5_cpp -llz4 $(BLASLIB) -L$(PREFIX)/lib
+	$(CXX) $(LDDFLAGS) -o libssu.so tree.o biom.o $(UNIFRAC_FILES) unifrac.o cmd.o skbio_alt.o api.o -lc -llz4 $(BLASLIB) -L$(PREFIX)/lib -noshlib -lhdf5_cpp -lhdf5_hl_cpp -lhdf5_hl -lhdf5
 
 api.o: api.cpp api.hpp unifrac.hpp skbio_alt.hpp biom.hpp tree.hpp
 	$(CXX) $(CPPFLAGS) api.cpp -c -o api.o -fPIC


=====================================
src/api.cpp
=====================================
@@ -18,6 +18,9 @@
 #define MMAP_FD_MASK 0x0fff
 #define MMAP_FLAG    0x1000
 
+// Note: Threading is now full controlled by OpenMP.
+// Any threads variable is really referring to n_substeps.
+// The old naming was retained to minimize code refactoring.
 
 #define SETUP_TDBG(method) const char *tdbg_method=method; \
                           bool print_tdbg = false;\
@@ -40,15 +43,23 @@
                                       return err;                 \
                                   }
 
-#define SET_METHOD(requested_method, err) Method method;                                                       \
-                                          if(std::strcmp(requested_method, "unweighted") == 0)                 \
-                                              method = unweighted;                                             \
-                                          else if(std::strcmp(requested_method, "weighted_normalized") == 0)   \
-                                              method = weighted_normalized;                                    \
-                                          else if(std::strcmp(requested_method, "weighted_unnormalized") == 0) \
-                                              method = weighted_unnormalized;                                  \
-                                          else if(std::strcmp(requested_method, "generalized") == 0)           \
-                                              method = generalized;                                            \
+#define SET_METHOD(requested_method, err) Method method;                                                            \
+                                          if(std::strcmp(requested_method, "unweighted") == 0)                      \
+                                              method = unweighted_fp32;                                             \
+                                          else if(std::strcmp(requested_method, "weighted_normalized") == 0)        \
+                                              method = weighted_normalized_fp32;                                    \
+                                          else if(std::strcmp(requested_method, "weighted_unnormalized") == 0)      \
+                                              method = weighted_unnormalized_fp32;                                  \
+                                          else if(std::strcmp(requested_method, "generalized") == 0)                \
+                                              method = generalized_fp32;                                            \
+                                          else if(std::strcmp(requested_method, "unweighted_fp64") == 0)            \
+                                              method = unweighted;                                                  \
+                                          else if(std::strcmp(requested_method, "weighted_normalized_fp64") == 0)   \
+                                              method = weighted_normalized;                                         \
+                                          else if(std::strcmp(requested_method, "weighted_unnormalized_fp64") == 0) \
+                                              method = weighted_unnormalized;                                       \
+                                          else if(std::strcmp(requested_method, "generalized_fp64") == 0)           \
+                                              method = generalized;                                                 \
                                           else if(std::strcmp(requested_method, "unweighted_fp32") == 0)            \
                                               method = unweighted_fp32;                                             \
                                           else if(std::strcmp(requested_method, "weighted_normalized_fp32") == 0)   \
@@ -163,10 +174,12 @@ void initialize_mat_no_biom(mat_t* &result, char** sample_ids, unsigned int n_sa
 }
 
 inline compute_status is_fp64_method(const std::string &method_string, bool &fp64) {
-    if ((method_string=="unweighted_fp32") || (method_string=="weighted_normalized_fp32") || (method_string=="weighted_unnormalized_fp32") || (method_string=="generalized_fp32")) {
+    if ((method_string=="unweighted") || (method_string=="weighted_normalized") || (method_string=="weighted_unnormalized") || (method_string=="generalized")) {
         fp64 = false;
-    } else if ((method_string=="unweighted") || (method_string=="weighted_normalized") || (method_string=="weighted_unnormalized") || (method_string=="generalized")) {
+    } else if ((method_string=="unweighted_fp64") || (method_string=="weighted_normalized_fp64") || (method_string=="weighted_unnormalized_fp64") || (method_string=="generalized_fp64")) {
        fp64 = true;
+    } else if ((method_string=="unweighted_fp32") || (method_string=="weighted_normalized_fp32") || (method_string=="weighted_unnormalized_fp32") || (method_string=="generalized_fp32")) {
+       fp64 = false;
     } else {
         return unknown_method;
     }
@@ -358,7 +371,7 @@ void set_tasks(std::vector<su::task_parameters> &tasks,
                unsigned int stripe_start,
                unsigned int stripe_stop,
                bool bypass_tips,
-               unsigned int nthreads) {
+               unsigned int n_tasks) {
 
     // compute from start to the max possible stripe if stop doesn't make sense
     if(stripe_stop <= stripe_start)
@@ -369,16 +382,16 @@ void set_tasks(std::vector<su::task_parameters> &tasks,
      *
      * we use the remaining the chunksize for bins which cannot be full maximally
      */
-    unsigned int fullchunk = ((stripe_stop - stripe_start) + nthreads - 1) / nthreads;  // this computes the ceiling
-    unsigned int smallchunk = (stripe_stop - stripe_start) / nthreads;
+    unsigned int fullchunk = ((stripe_stop - stripe_start) + n_tasks - 1) / n_tasks;  // this computes the ceiling
+    unsigned int smallchunk = (stripe_stop - stripe_start) / n_tasks;
 
-    unsigned int n_fullbins = (stripe_stop - stripe_start) % nthreads;
+    unsigned int n_fullbins = (stripe_stop - stripe_start) % n_tasks;
     if(n_fullbins == 0)
-        n_fullbins = nthreads;
+        n_fullbins = n_tasks;
 
     unsigned int start = stripe_start;
 
-    for(unsigned int tid = 0; tid < nthreads; tid++) {
+    for(unsigned int tid = 0; tid < n_tasks; tid++) {
         tasks[tid].tid = tid;
         tasks[tid].start = start; // stripe start
         tasks[tid].bypass_tips = bypass_tips;
@@ -1463,7 +1476,7 @@ MergeStatus validate_partial(const partial_dyn_mat_t* const * partial_mats, int
 }
 
 
-MergeStatus merge_partial(partial_mat_t** partial_mats, int n_partials, unsigned int nthreads, mat_t** result) {
+MergeStatus merge_partial(partial_mat_t** partial_mats, int n_partials, unsigned int dummy, mat_t** result) {
     MergeStatus err = check_partial(partial_mats, n_partials, false);
     if (err!=merge_okay) return err;
 


=====================================
src/api.hpp
=====================================
@@ -171,7 +171,7 @@ EXTERN void destroy_results_vec(r_vec** result);
  * variance_adjust <bool> whether to apply variance adjustment.
  * alpha <double> GUniFrac alpha, only relevant if method == generalized.
  * bypass_tips <bool> disregard tips, reduces compute by about 50%
- * threads <uint> the number of threads to use.
+ * n_substeps <uint> the number of substeps to use.
  * result <mat_t**> the resulting distance matrix in condensed form, this is initialized within the method so using **
  *
  * one_off returns the following error codes:
@@ -184,7 +184,7 @@ EXTERN void destroy_results_vec(r_vec** result);
  */
 EXTERN ComputeStatus one_off(const char* biom_filename, const char* tree_filename,
                              const char* unifrac_method, bool variance_adjust, double alpha,
-                             bool bypass_tips, unsigned int threads, mat_t** result);
+                             bool bypass_tips, unsigned int n_substeps, mat_t** result);
 
 
 /* Compute UniFrac - against in-memory objects returning full form matrix
@@ -195,7 +195,7 @@ EXTERN ComputeStatus one_off(const char* biom_filename, const char* tree_filenam
  * variance_adjust <bool> whether to apply variance adjustment.
  * alpha <double> GUniFrac alpha, only relevant if method == generalized.
  * bypass_tips <bool> disregard tips, reduces compute by about 50%
- * threads <uint> the number of threads to use.
+ * n_substeps <uint> the number of substeps to use.
  * result <mat_full_fp64_t**> the resulting distance matrix in full form, this is initialized within the method so using **
  *
  * one_off_inmem returns the following error codes:
@@ -206,7 +206,7 @@ EXTERN ComputeStatus one_off(const char* biom_filename, const char* tree_filenam
  */
 EXTERN ComputeStatus one_off_inmem(const support_biom_t *table_data, const support_bptree_t *tree_data,
                                    const char* unifrac_method, bool variance_adjust, double alpha,
-                                   bool bypass_tips, unsigned int threads, mat_full_fp64_t** result);
+                                   bool bypass_tips, unsigned int n_substeps, mat_full_fp64_t** result);
 
 /* Compute UniFrac - against in-memory objects returning full form matrix, fp32
  *
@@ -216,7 +216,7 @@ EXTERN ComputeStatus one_off_inmem(const support_biom_t *table_data, const suppo
  * variance_adjust <bool> whether to apply variance adjustment.
  * alpha <double> GUniFrac alpha, only relevant if method == generalized.
  * bypass_tips <bool> disregard tips, reduces compute by about 50%
- * threads <uint> the number of threads to use.
+ * n_substeps <uint> the number of substeps to use.
  * result <mat_full_fp32_t**> the resulting distance matrix in full form, this is initialized within the method so using **
  *
  * one_off_inmem returns the following error codes:
@@ -227,7 +227,7 @@ EXTERN ComputeStatus one_off_inmem(const support_biom_t *table_data, const suppo
  */
 EXTERN ComputeStatus one_off_inmem_fp32(const support_biom_t *table_data, const support_bptree_t *tree_data,
                                         const char* unifrac_method, bool variance_adjust, double alpha,
-                                        bool bypass_tips, unsigned int threads, mat_full_fp32_t** result);
+                                        bool bypass_tips, unsigned int n_substeps, mat_full_fp32_t** result);
 
 /* Compute UniFrac - matrix form
  *
@@ -237,7 +237,7 @@ EXTERN ComputeStatus one_off_inmem_fp32(const support_biom_t *table_data, const
  * variance_adjust <bool> whether to apply variance adjustment.
  * alpha <double> GUniFrac alpha, only relevant if method == generalized.
  * bypass_tips <bool> disregard tips, reduces compute by about 50%
- * threads <uint> the number of threads/blocks to use.
+ * n_substeps <uint> the number of substeps/blocks to use.
  * mmap_dir <const char*> If not NULL, area to use for temp memory storage
  * result <mat_full_fp64_t**> the resulting distance matrix in matrix form, this is initialized within the method so using **
  *
@@ -251,7 +251,7 @@ EXTERN ComputeStatus one_off_inmem_fp32(const support_biom_t *table_data, const
  */
 EXTERN ComputeStatus one_off_matrix(const char* biom_filename, const char* tree_filename,
                                     const char* unifrac_method, bool variance_adjust, double alpha,
-                                    bool bypass_tips, unsigned int nthreads,
+                                    bool bypass_tips, unsigned int n_substeps,
                                     const char *mmap_dir,
                                     mat_full_fp64_t** result);
 
@@ -263,7 +263,7 @@ EXTERN ComputeStatus one_off_matrix(const char* biom_filename, const char* tree_
  * variance_adjust <bool> whether to apply variance adjustment.
  * alpha <double> GUniFrac alpha, only relevant if method == generalized.
  * bypass_tips <bool> disregard tips, reduces compute by about 50%
- * threads <uint> the number of threads/blocks to use.
+ * n_substeps <uint> the number of substeps/blocks to use.
  * mmap_dir <const char*> If not NULL, area to use for temp memory storage
  * result <mat_full_fp32_t**> the resulting distance matrix in matrix form, this is initialized within the method so using **
  *
@@ -277,7 +277,7 @@ EXTERN ComputeStatus one_off_matrix(const char* biom_filename, const char* tree_
  */
 EXTERN ComputeStatus one_off_matrix_fp32(const char* biom_filename, const char* tree_filename,
                                          const char* unifrac_method, bool variance_adjust, double alpha,
-                                         bool bypass_tips, unsigned int nthreads,
+                                         bool bypass_tips, unsigned int n_substeps,
                                          const char *mmap_dir,
                                          mat_full_fp32_t** result);
 
@@ -306,7 +306,7 @@ EXTERN ComputeStatus faith_pd_one_off(const char* biom_filename, const char* tre
  * variance_adjust <bool> whether to apply variance adjustment.
  * alpha <double> GUniFrac alpha, only relevant if method == generalized.
  * bypass_tips <bool> disregard tips, reduces compute by about 50%
- * threads <uint> the number of threads to use.
+ * n_substeps <uint> the number of substeps to use.
  * format <const char*> output format to use.
  * pcoa_dims <uint> if not 0, number of dimensions to use or PCoA
  * mmap_dir <const char*> if not empty, temp dir to use for disk-based memory 
@@ -322,7 +322,7 @@ EXTERN ComputeStatus faith_pd_one_off(const char* biom_filename, const char* tre
  */
 EXTERN ComputeStatus unifrac_to_file(const char* biom_filename, const char* tree_filename, const char* out_filename,
                                      const char* unifrac_method, bool variance_adjust, double alpha,
-                                     bool bypass_tips, unsigned int threads, const char* format,
+                                     bool bypass_tips, unsigned int n_substeps, const char* format,
                                      unsigned int pcoa_dims, const char *mmap_dir);
 
 /* Write a matrix object
@@ -429,7 +429,7 @@ EXTERN IOStatus write_vec(const char* filename, r_vec* result);
  * variance_adjust <bool> whether to apply variance adjustment.
  * alpha <double> GUniFrac alpha, only relevant if method == generalized.
  * bypass_tips <bool> disregard tips, reduces compute by about 50%
- * threads <uint> the number of threads to use.
+ * n_substeps <uint> the number of substeps to use.
  * stripe_start <uint> the starting stripe to compute
  * stripe_stop <uint> the last stripe to compute
  * dm_stripes <vector of double*> the unique branch length stripes. This is expected to be
@@ -448,7 +448,7 @@ EXTERN IOStatus write_vec(const char* filename, r_vec* result);
 
 EXTERN ComputeStatus partial(const char* biom_filename, const char* tree_filename,
                              const char* unifrac_method, bool variance_adjust, double alpha,
-                             bool bypass_tips, unsigned int threads, unsigned int stripe_start,
+                             bool bypass_tips, unsigned int n_substeps, unsigned int stripe_start,
                              unsigned int stripe_stop, partial_mat_t** result);
 
 /* Write a partial matrix object
@@ -558,7 +558,7 @@ EXTERN MergeStatus validate_partial(const partial_dyn_mat_t* const * partial_mat
  * sample_id_consistency : samples described by stripes are inconsistent
  * square_mismatch       : inconsistency on denotation of square matrix
  */
-EXTERN MergeStatus merge_partial(partial_mat_t** partial_mats, int n_partials, unsigned int nthreads, mat_t** result);
+EXTERN MergeStatus merge_partial(partial_mat_t** partial_mats, int n_partials, unsigned int dummy, mat_t** result);
 
 /* Merge partial results
  *
@@ -651,6 +651,6 @@ void set_tasks(std::vector<su::task_parameters> &tasks,
                unsigned int stripe_start,
                unsigned int stripe_stop,
                bool bypass_tips,
-               unsigned int nthreads);
+               unsigned int n_tasks);
 
 #endif


=====================================
src/biom.cpp
=====================================
@@ -127,8 +127,8 @@ biom::biom() : has_hdf5_backing(false) {
 }
 
 // not using const on indices/indptr/data as the pointers are being borrowed
-biom::biom(char** obs_ids_in,
-           char** samp_ids_in,
+biom::biom(const char* const * obs_ids_in,
+          const  char* const * samp_ids_in,
            uint32_t* indices,
            uint32_t* indptr,
            double* data,


=====================================
src/biom.hpp
=====================================
@@ -41,8 +41,8 @@ namespace su {
              * @param n_samples number of samples
              * @param nnz number of data points
              */
-            biom(char** obs_ids,
-                 char** samp_ids,
+            biom(const char* const * obs_ids,
+                 const char* const * samp_ids,
                  uint32_t* index,
                  uint32_t* indptr,
                  double* data,


=====================================
src/su.cpp
=====================================
@@ -13,15 +13,17 @@
 enum Format {format_invalid,format_ascii, format_hdf5_fp32, format_hdf5_fp64};
 
 void usage() {
-    std::cout << "usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-n threads] [-a alpha] [-f]  [--vaw]" << std::endl;
-    std::cout << "    [--mode [MODE]] [--start starting-stripe] [--stop stopping-stripe] [--partial-pattern <glob>]" << std::endl;
+    std::cout << "usage: ssu -i <biom> -o <out.dm> -m [METHOD] -t <newick> [-a alpha] [-f]  [--vaw]" << std::endl;
+    std::cout << "    [--mode MODE] [--start starting-stripe] [--stop stopping-stripe] [--partial-pattern <glob>]" << std::endl;
     std::cout << "    [--n-partials number_of_partitions] [--report-bare] [--format|-r out-mode]" << std::endl;
+    std::cout << "    [--n-substeps n] [--pcoa dims] [--diskbuf path]" << std::endl;
     std::cout << std::endl;
     std::cout << "    -i\t\tThe input BIOM table." << std::endl;
     std::cout << "    -t\t\tThe input phylogeny in newick." << std::endl;
-    std::cout << "    -m\t\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized | unweighted_fp32 | weighted_normalized_fp32 | weighted_unnormalized_fp32 | generalized_fp32]." << std::endl;
+    std::cout << "    -m\t\tThe method, [unweighted | weighted_normalized | weighted_unnormalized | generalized |" << std::endl;
+    std::cout << "                       unweighted_fp64 | weighted_normalized_fp64 | weighted_unnormalized_fp64 | generalized_fp64 |" << std::endl;
+    std::cout << "                       unweighted_fp32 | weighted_normalized_fp32 | weighted_unnormalized_fp32 | generalized_fp32]." << std::endl;
     std::cout << "    -o\t\tThe output distance matrix." << std::endl;
-    std::cout << "    -n\t\t[OPTIONAL] The number of threads, default is 1." << std::endl;
     std::cout << "    -a\t\t[OPTIONAL] Generalized UniFrac alpha, default is 1." << std::endl;
     std::cout << "    -f\t\t[OPTIONAL] Bypass tips, reduces compute by about 50%." << std::endl;
     std::cout << "    --vaw\t[OPTIONAL] Variance adjusted, default is to not adjust for variance." << std::endl;
@@ -36,6 +38,7 @@ void usage() {
     std::cout << "    --partial-pattern\t[OPTIONAL] If mode==merge-partial or check-partial, a glob pattern for partial outputs to merge." << std::endl;
     std::cout << "    --n-partials\t[OPTIONAL] If mode==partial-report, the number of partitions to compute." << std::endl;
     std::cout << "    --report-bare\t[OPTIONAL] If mode==partial-report, produce barebones output." << std::endl;
+    std::cout << "    --n-substeps\t[OPTIONAL] Internally split the problem in n substeps for reduced memory footprint, default is 1." << std::endl;
     std::cout << "    --format|-r\t[OPTIONAL]  Output format:" << std::endl;
     std::cout << "    \t\t    ascii : [DEFAULT] Original ASCII format." << std::endl;
     std::cout << "    \t\t    hfd5 : HFD5 format.  May be fp32 or fp64, depending on method." << std::endl;
@@ -43,9 +46,16 @@ void usage() {
     std::cout << "    \t\t    hdf5_fp64 : HFD5 format, using fp64 precision." << std::endl;
     std::cout << "    --pcoa\t[OPTIONAL] Number of PCoA dimensions to compute (default: 10, do not compute if 0)" << std::endl;
     std::cout << "    --diskbuf\t[OPTIONAL] Use a disk buffer to reduce memory footprint. Provide path to a fast partition (ideally NVMe)." << std::endl;
+    std::cout << "    -n\t\t[OPTIONAL] DEPRECATED, no-op." << std::endl;
+    std::cout << std::endl;
+    std::cout << "Environment variables: " << std::endl;
+    std::cout << "    CPU parallelism is controlled by OMP_NUM_THREADS. If not defined, all detected core will be used." << std::endl;
+    std::cout << "    GPU offload can be disabled with UNIFRAC_USE_GPU=N. By default, if a NVIDIA GPU is detected, it will be used." << std::endl;
+    std::cout << "    A specific GPU can be selected with ACC_DEVICE_NUM. If not defined, the first GPU will be used." << std::endl;
     std::cout << std::endl;
     std::cout << "Citations: " << std::endl;
     std::cout << "    For UniFrac, please see:" << std::endl;
+    std::cout << "        Sfiligoi et al. mSystems 2022; DOI: 10.1128/msystems.00028-22" << std::endl;
     std::cout << "        McDonald et al. Nature Methods 2018; DOI: 10.1038/s41592-018-0187-8" << std::endl;
     std::cout << "        Lozupone and Knight Appl Environ Microbiol 2005; DOI: 10.1128/AEM.71.12.8228-8235.2005" << std::endl;
     std::cout << "        Lozupone et al. Appl Environ Microbiol 2007; DOI: 10.1128/AEM.01996-06" << std::endl;
@@ -296,7 +306,7 @@ int mode_check_partial(const std::string &partial_pattern) {
 int mode_partial(std::string table_filename, std::string tree_filename, 
                  std::string output_filename, std::string method_string,
                  bool vaw, double g_unifrac_alpha, bool bypass_tips, 
-                 unsigned int nthreads, int start_stripe, int stop_stripe) {
+                 unsigned int nsubsteps, int start_stripe, int stop_stripe) {
     if(output_filename.empty()) {
         err("output filename missing");
         return EXIT_FAILURE;
@@ -329,7 +339,7 @@ int mode_partial(std::string table_filename, std::string tree_filename,
     partial_mat_t *result = NULL;
     compute_status status;
     status = partial(table_filename.c_str(), tree_filename.c_str(), method_string.c_str(), 
-                     vaw, g_unifrac_alpha, bypass_tips, nthreads, start_stripe, stop_stripe, &result);
+                     vaw, g_unifrac_alpha, bypass_tips, nsubsteps, start_stripe, stop_stripe, &result);
     if(status != okay || result == NULL) {
         fprintf(stderr, "Compute failed in partial: %s\n", compute_status_messages[status]);
         exit(EXIT_FAILURE);
@@ -350,7 +360,7 @@ int mode_one_off(const std::string &table_filename, const std::string &tree_file
                  const std::string &output_filename, const std::string &format_str, Format format_val, 
                  const std::string &method_string, unsigned int pcoa_dims,
                  bool vaw, double g_unifrac_alpha, bool bypass_tips,
-                 unsigned int nthreads, const std::string &mmap_dir) {
+                 unsigned int nsubsteps, const std::string &mmap_dir) {
     if(output_filename.empty()) {
         err("output filename missing");
         return EXIT_FAILURE;
@@ -376,7 +386,7 @@ int mode_one_off(const std::string &table_filename, const std::string &tree_file
       mat_t *result = NULL;
 
       status = one_off(table_filename.c_str(), tree_filename.c_str(), method_string.c_str(), 
-                       vaw, g_unifrac_alpha, bypass_tips, nthreads, &result);
+                       vaw, g_unifrac_alpha, bypass_tips, nsubsteps, &result);
       if(status != okay || result == NULL) {
         fprintf(stderr, "Compute failed in one_off: %s\n", compute_status_messages[status]);
         exit(EXIT_FAILURE);
@@ -394,7 +404,7 @@ int mode_one_off(const std::string &table_filename, const std::string &tree_file
       const char * mmap_dir_c = mmap_dir.empty() ? NULL : mmap_dir.c_str();
 
       status = unifrac_to_file(table_filename.c_str(), tree_filename.c_str(), output_filename.c_str(),
-                               method_string.c_str(), vaw, g_unifrac_alpha, bypass_tips, nthreads, format_str.c_str(),
+                               method_string.c_str(), vaw, g_unifrac_alpha, bypass_tips, nsubsteps, format_str.c_str(),
                                pcoa_dims, mmap_dir_c);
 
       if (status != okay) {
@@ -422,10 +432,10 @@ Format get_format(const std::string &format_string, const std::string &method_st
     } else if (format_string == "hdf5_fp64") {
         format_val = format_hdf5_fp64;
     } else if (format_string == "hdf5") {
-        if ((method_string=="unweighted_fp32") || (method_string=="weighted_normalized_fp32") || (method_string=="weighted_unnormalized_fp32") || (method_string=="generalized_fp32"))
-           format_val = format_hdf5_fp32;
-        else
+        if ((method_string=="unweighted_fp64") || (method_string=="weighted_normalized_fp64") || (method_string=="weighted_unnormalized_fp64") || (method_string=="generalized_fp64"))
            format_val = format_hdf5_fp64;
+        else
+           format_val = format_hdf5_fp32;
     }
 
     return format_val;
@@ -439,12 +449,14 @@ int main(int argc, char **argv){
         return EXIT_SUCCESS;
     }
 
-    unsigned int nthreads;
+    unsigned int nsubsteps;
     std::string table_filename = input.getCmdOption("-i");
     std::string tree_filename = input.getCmdOption("-t");
     std::string output_filename = input.getCmdOption("-o");
     std::string method_string = input.getCmdOption("-m");
-    std::string nthreads_arg = input.getCmdOption("-n");
+    // deprecated, but we still want to support it, even as a no-op
+    std::string nold_arg = input.getCmdOption("-n");
+    std::string nsubsteps_arg = input.getCmdOption("--n-substeps");
     std::string gunifrac_arg = input.getCmdOption("-a");
     std::string mode_arg = input.getCmdOption("--mode");
     std::string start_arg = input.getCmdOption("--start");
@@ -457,10 +469,10 @@ int main(int argc, char **argv){
     std::string pcoa_arg = input.getCmdOption("--pcoa");
     std::string diskbuf_arg = input.getCmdOption("--diskbuf");
 
-    if(nthreads_arg.empty()) {
-        nthreads = 1;
+    if(nsubsteps_arg.empty()) {
+        nsubsteps = 1;
     } else {
-        nthreads = atoi(nthreads_arg.c_str());
+        nsubsteps = atoi(nsubsteps_arg.c_str());
     }
     
     bool vaw = input.cmdOptionExists("--vaw"); 
@@ -521,9 +533,9 @@ int main(int argc, char **argv){
 
 
     if(mode_arg.empty() || mode_arg == "one-off")
-        return mode_one_off(table_filename, tree_filename, output_filename, format_arg, format_val, method_string, pcoa_dims, vaw, g_unifrac_alpha, bypass_tips, nthreads, diskbuf_arg);
+        return mode_one_off(table_filename, tree_filename, output_filename, format_arg, format_val, method_string, pcoa_dims, vaw, g_unifrac_alpha, bypass_tips, nsubsteps, diskbuf_arg);
     else if(mode_arg == "partial")
-        return mode_partial(table_filename, tree_filename, output_filename, method_string, vaw, g_unifrac_alpha, bypass_tips, nthreads, start_stripe, stop_stripe);
+        return mode_partial(table_filename, tree_filename, output_filename, method_string, vaw, g_unifrac_alpha, bypass_tips, nsubsteps, start_stripe, stop_stripe);
     else if(mode_arg == "merge-partial")
         return mode_merge_partial(output_filename, format_val, pcoa_dims, partial_pattern, diskbuf_arg);
     else if(mode_arg == "check-partial")


=====================================
src/test_su.cpp
=====================================
@@ -509,8 +509,8 @@ void test_biom_constructor_from_sparse() {
     uint32_t index[] = {2, 0, 1, 3, 4, 5, 2, 3, 5, 0, 1, 2, 5, 1, 2};
     uint32_t indptr[] = {0,  1,  6,  9, 13, 15};
     double data[] = {1., 5., 1., 2., 3., 1., 1., 4., 2., 2., 1., 1., 1., 1., 1.};
-    char* obs_ids[] = {"GG_OTU_1", "GG_OTU_2", "GG_OTU_3", "GG_OTU_4", "GG_OTU_5"};
-    char* samp_ids[] = {"Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6"};
+    const char* obs_ids[] = {"GG_OTU_1", "GG_OTU_2", "GG_OTU_3", "GG_OTU_4", "GG_OTU_5"};
+    const char* samp_ids[] = {"Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6"};
 
     su::biom table = su::biom(obs_ids, samp_ids, index, indptr, data, 5, 6, 15);
     _exercise_get_obs_data(table);
@@ -1838,7 +1838,7 @@ void test_bptree_cstyle_constructor() {
                                 //11101000
     bool structure[] = {true, true, true, false, true, false, false, false};
     double lengths[] = {0, 0, 1, 0, 2, 0, 0, 0};
-    char* names[] = {"", "c", "123:foo; bar", "", "b", "", "", ""};
+    const char* names[] = {"", "c", "123:foo; bar", "", "b", "", "", ""};
     su::BPTree tree = su::BPTree(structure, lengths, names, 8);
 
     unsigned int exp_nparens = 8;


=====================================
src/tree.cpp
=====================================
@@ -52,7 +52,7 @@ BPTree::BPTree(std::vector<bool> input_structure, std::vector<double> input_leng
     index_and_cache();
 }
 
-BPTree::BPTree(const bool* input_structure, const double* input_lengths, char** input_names, const int n_parens) {
+BPTree::BPTree(const bool* input_structure, const double* input_lengths, const char* const * input_names, const int n_parens) {
     structure = std::vector<bool>();
     lengths = std::vector<double>();
     names = std::vector<std::string>();


=====================================
src/tree.hpp
=====================================
@@ -51,7 +51,7 @@ namespace su {
              * @param input_names A char* array of the names
              * @param n_parens The length of the topology
              */
-            BPTree(const bool* input_structure, const double* input_lengths, char** input_names, const int n_parens);
+            BPTree(const bool* input_structure, const double* input_lengths, const char* const * input_names, const int n_parens);
 
             /* postorder tree traversal
              *


=====================================
src/unifrac_internal.cpp
=====================================
@@ -29,10 +29,11 @@ static int sync_printf(const char *format, ...) {
     va_start(args, format);
 
     pthread_mutex_lock(&printf_mutex);
-    vprintf(format, args);
+    int cnt = vprintf(format, args);
     pthread_mutex_unlock(&printf_mutex);
 
     va_end(args);
+    return cnt;
 }
 
 static void sig_handler(int signo) {


=====================================
src/unifrac_task.hpp
=====================================
@@ -53,22 +53,23 @@ namespace SUCMP_NM {
 
     public:
       const unsigned int start_idx;
+      const unsigned int stop_idx;
       const unsigned int n_samples;
       const uint64_t  n_samples_r;
       TFloat* const buf;
 
       UnifracTaskVector(std::vector<double*> &_dm_stripes, const su::task_parameters* _task_p)
       : dm_stripes(_dm_stripes), task_p(_task_p)
-      , start_idx(task_p->start), n_samples(task_p->n_samples)
+      , start_idx(task_p->start), stop_idx(task_p->stop), n_samples(task_p->n_samples)
       , n_samples_r(((n_samples + UNIFRAC_BLOCK-1)/UNIFRAC_BLOCK)*UNIFRAC_BLOCK) // round up
-      , buf((dm_stripes[start_idx]==NULL) ? NULL : new TFloat[n_samples_r*(task_p->stop-start_idx)]) // dm_stripes could be null, in which case keep it null
+      , buf((dm_stripes[start_idx]==NULL) ? NULL : new TFloat[n_samples_r*(stop_idx-start_idx)]) // dm_stripes could be null, in which case keep it null
       {
         TFloat* const ibuf = buf;
         if (ibuf != NULL) {
 #ifdef _OPENACC
-          const uint64_t bufels = n_samples_r * (task_p->stop-start_idx);
+          const uint64_t bufels = n_samples_r * (stop_idx-start_idx);
 #endif
-          for(unsigned int stripe=start_idx; stripe < task_p->stop; stripe++) {
+          for(unsigned int stripe=start_idx; stripe < stop_idx; stripe++) {
              double * dm_stripe = dm_stripes[stripe];
              TFloat * buf_stripe = this->operator[](stripe);
              for(unsigned int j=0; j<n_samples; j++) {
@@ -86,6 +87,9 @@ namespace SUCMP_NM {
         }
       }
 
+      UnifracTaskVector<TFloat>(const UnifracTaskVector<TFloat>& ) = delete;
+      UnifracTaskVector<TFloat>& operator= (const UnifracTaskVector<TFloat>&) = delete;
+
       TFloat * operator[](unsigned int idx) { return buf+((idx-start_idx)*n_samples_r);}
       const TFloat * operator[](unsigned int idx) const { return buf+((idx-start_idx)*n_samples_r);}
 
@@ -95,10 +99,10 @@ namespace SUCMP_NM {
         TFloat* const ibuf = buf;
         if (ibuf != NULL) {
 #ifdef _OPENACC
-          const uint64_t bufels = n_samples_r * (task_p->stop-start_idx); 
+          const uint64_t bufels = n_samples_r * (stop_idx-start_idx); 
 #pragma acc exit data copyout(ibuf[:bufels])
 #endif    
-          for(unsigned int stripe=start_idx; stripe < task_p->stop; stripe++) {
+          for(unsigned int stripe=start_idx; stripe < stop_idx; stripe++) {
              double * dm_stripe = dm_stripes[stripe];
              TFloat * buf_stripe = this->operator[](stripe);
              for(unsigned int j=0; j<n_samples; j++) {
@@ -147,6 +151,9 @@ namespace SUCMP_NM {
         : dm_stripes(baseObj.dm_stripes), dm_stripes_total(baseObj.dm_stripes_total), task_p(baseObj.task_p) {}
         */
 
+        UnifracTaskBase<TFloat,TEmb>(const UnifracTaskBase<TFloat,TEmb>& ) = delete;
+        UnifracTaskBase<TFloat,TEmb>& operator= (const UnifracTaskBase<TFloat,TEmb>&) = delete;
+
         virtual ~UnifracTaskBase()
         {
 #ifdef _OPENACC
@@ -316,6 +323,8 @@ namespace SUCMP_NM {
         , embedded_proportions(_embedded_proportions), max_embs(_max_embs) {}
         */
       
+       UnifracTask<TFloat,TEmb>(const UnifracTask<TFloat,TEmb>& ) = delete;
+       UnifracTask<TFloat,TEmb>& operator= (const UnifracTask<TFloat,TEmb>&) = delete;
 
        virtual ~UnifracTask() {}
 
@@ -346,6 +355,9 @@ namespace SUCMP_NM {
 #pragma acc enter data create(zcheck[:n_samples],sums[:n_samples])
         }
 
+        UnifracUnnormalizedWeightedTask<TFloat>(const UnifracUnnormalizedWeightedTask<TFloat>& ) = delete;
+        UnifracUnnormalizedWeightedTask<TFloat>& operator= (const UnifracUnnormalizedWeightedTask<TFloat>&) = delete;
+
         virtual ~UnifracUnnormalizedWeightedTask()
         {
 #ifdef _OPENACC
@@ -381,6 +393,9 @@ namespace SUCMP_NM {
 #pragma acc enter data create(zcheck[:n_samples],sums[:n_samples])
         }
 
+        UnifracNormalizedWeightedTask<TFloat>(const UnifracNormalizedWeightedTask<TFloat>& ) = delete;
+        UnifracNormalizedWeightedTask<TFloat>& operator= (const UnifracNormalizedWeightedTask<TFloat>&) = delete;
+
         virtual ~UnifracNormalizedWeightedTask()
         {
 #ifdef _OPENACC
@@ -421,6 +436,9 @@ namespace SUCMP_NM {
 #pragma acc enter data create(zcheck[:n_samples],stripe_sums[:n_samples],sums[:bsize])
         }
 
+        UnifracUnweightedTask<TFloat>(const UnifracUnweightedTask<TFloat>& ) = delete;
+        UnifracUnweightedTask<TFloat>& operator= (const UnifracUnweightedTask<TFloat>&) = delete;
+
         virtual ~UnifracUnweightedTask()
         {
 #ifdef _OPENACC
@@ -450,6 +468,9 @@ namespace SUCMP_NM {
         UnifracGeneralizedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total, unsigned int _max_embs, const su::task_parameters* _task_p)
         : UnifracTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_max_embs,_task_p) {}
 
+        UnifracGeneralizedTask<TFloat>(const UnifracGeneralizedTask<TFloat>& ) = delete;
+        UnifracGeneralizedTask<TFloat>& operator= (const UnifracGeneralizedTask<TFloat>&) = delete;
+
         virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
 
         void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
@@ -516,6 +537,8 @@ namespace SUCMP_NM {
         , embedded_proportions(_embedded_proportions), embedded_counts(initialize_embedded<TFloat>()), sample_total_counts(_sample_total_counts), max_embs(_max_embs) {}
         */
 
+       UnifracVawTask<TFloat,TEmb>(const UnifracVawTask<TFloat,TEmb>& ) = delete;
+       UnifracVawTask<TFloat,TEmb>& operator= (const UnifracVawTask<TFloat,TEmb>&) = delete;
 
        virtual ~UnifracVawTask() 
        {
@@ -554,6 +577,9 @@ namespace SUCMP_NM {
                     unsigned int _max_embs, const su::task_parameters* _task_p)
         : UnifracVawTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
 
+        UnifracVawUnnormalizedWeightedTask<TFloat>(const UnifracVawUnnormalizedWeightedTask<TFloat>& ) = delete;
+        UnifracVawUnnormalizedWeightedTask<TFloat>& operator= (const UnifracVawUnnormalizedWeightedTask<TFloat>&) = delete;
+
         virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
 
         void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
@@ -566,6 +592,9 @@ namespace SUCMP_NM {
                     unsigned int _max_embs, const su::task_parameters* _task_p)
         : UnifracVawTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
 
+        UnifracVawNormalizedWeightedTask<TFloat>(const UnifracVawNormalizedWeightedTask<TFloat>& ) = delete;
+        UnifracVawNormalizedWeightedTask<TFloat>& operator= (const UnifracVawNormalizedWeightedTask<TFloat>&) = delete;
+
         virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
 
         void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
@@ -578,6 +607,9 @@ namespace SUCMP_NM {
                     unsigned int _max_embs, const su::task_parameters* _task_p)
         : UnifracVawTask<TFloat,uint32_t>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
 
+        UnifracVawUnweightedTask<TFloat>(const UnifracVawUnweightedTask<TFloat>& ) = delete;
+        UnifracVawUnweightedTask<TFloat>& operator= (const UnifracVawUnweightedTask<TFloat>&) = delete;
+
         virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
 
         void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
@@ -590,6 +622,9 @@ namespace SUCMP_NM {
                     unsigned int _max_embs, const su::task_parameters* _task_p)
         : UnifracVawTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
 
+        UnifracVawGeneralizedTask<TFloat>(const UnifracVawGeneralizedTask<TFloat>& ) = delete;
+        UnifracVawGeneralizedTask<TFloat>& operator= (const UnifracVawGeneralizedTask<TFloat>&) = delete;
+
         virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
 
         void _run(unsigned int filled_embs, const TFloat * __restrict__ length);



View it on GitLab: https://salsa.debian.org/med-team/unifrac-tools/-/commit/b18dc326dcde37977cd13fa02d215ab1b5f94740

-- 
View it on GitLab: https://salsa.debian.org/med-team/unifrac-tools/-/commit/b18dc326dcde37977cd13fa02d215ab1b5f94740
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230103/6629946f/attachment-0001.htm>


More information about the debian-med-commit mailing list