[med-svn] [gmap] 08/11: Imported Upstream version 2015-07-23
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Sat Aug 22 06:25:59 UTC 2015
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 1f0c49c841cbde61523d576a683b91f91c9846d7
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Fri Aug 21 20:22:34 2015 +0200
Imported Upstream version 2015-07-23
---
ChangeLog | 117 ++
Makefile.in | 1 +
README | 25 +-
VERSION | 2 +-
acinclude.m4 | 1 +
config/shm-flags.m4 | 18 +
configure | 93 +-
configure.ac | 1 +
mpi/Makefile.in | 1 +
src/Makefile.am | 4 +-
src/Makefile.in | 5 +-
src/access.c | 20 +-
src/bigendian.c | 528 ++----
src/bigendian.h | 28 +-
src/bitpack64-access.c | 712 ++++----
src/bitpack64-read.c | 4234 +++++++++++++++++++++++++++++------------------
src/bitpack64-readtwo.c | 4128 +++++++++++++++++++++++++--------------------
src/bytecoding.c | 154 +-
src/bytecoding.h | 5 +-
src/compress.c | 315 ++--
src/compress.h | 8 +-
src/config.h.in | 3 +
src/dynprog_genome.c | 50 +-
src/genome-write.c | 6 +-
src/genome.c | 124 +-
src/genome.h | 5 +-
src/genome128_hr.c | 2188 +++++++++++++-----------
src/get-genome.c | 73 +-
src/gmap.c | 64 +-
src/gsnap.c | 102 +-
src/iit-read-univ.c | 28 +-
src/indexdb.c | 61 +-
src/indexdb_hr.c | 18 +-
src/mode.h | 4 +-
src/oligoindex_hr.c | 2448 ++++++++++++++++++---------
src/sarray-read.c | 261 ++-
src/sarray-write.c | 67 +-
src/sequence.c | 42 +-
src/sequence.h | 6 +-
src/snpindex.c | 64 +-
src/splice.c | 36 +-
src/stage1hr.c | 3444 ++++++++++++++++++--------------------
src/stage3hr.c | 14 +-
src/substring.c | 95 +-
src/types.h | 9 +-
src/uniqscan.c | 56 +-
src/univinterval.h | 6 +-
tests/Makefile.in | 1 +
util/Makefile.in | 1 +
49 files changed, 11504 insertions(+), 8172 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 62843c5..bcab131 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,122 @@
+2015-07-23 twu
+
+ * VERSION: Updated version number
+
+ * stage1hr.c: Removed an abort command from debugging
+
+ * sarray-read.c: Using new interface to Bytecoding lcp_next function.
+ Commented out code that is not used when SUBDIVIDE_ENDS is not defined.
+
+ * bytecoding.c, bytecoding.h: Call to lcp_next now returns child_next
+
+ * VERSION: Updated version number
+
+ * dynprog_genome.c: Fixed boundaries that led to negative coordinates for
+ splice site candidates.
+
+ * stage1hr.c: Removed unused variables
+
+ * stage1hr.c: Removed allvalidp as parameter to align_end and align_pair.
+
+2015-07-22 twu
+
+ * stage1hr.c: Setting spanningsetp and completesetp to false if querylength
+ < min_kmer_readlength
+
+ * stage1hr.c: Removed restriction on min_readlength. Running only suffix
+ array, if possible, if reads are too short.
+
+ * access.c: Changed user message
+
+ * sarray-write.c: Changing plcp[n] to be 0 instead of -1
+
+ * sarray-read.c: Improved debugging results
+
+ * access.c: Printing user message if shmem fails
+
+2015-07-17 twu
+
+ * get-genome.c, sequence.c, sequence.h: Added flags for --stream-chars and
+ --stream-ints
+
+2015-06-26 twu
+
+ * 2015-statgen, Makefile.gsnaptoo.am, algorithm.tex, discussion.tex,
+ features.tex, introduction.tex, trunk, util: Modified mergeinfo
+
+ * config.site.rescomp.tst: Updated version
+
+ * index.html: Updated for version 2015-06-23
+
+ * archive.html: Updated for version 2014-12-31
+
+ * README: Removed references to Goby
+
+ * access.c, bigendian.c, bigendian.h, bitpack64-access.c, bitpack64-read.c,
+ bitpack64-readtwo.c, bytecoding.c, compress.c, compress.h, genome-write.c,
+ genome.c, genome.h, genome128_hr.c, iit-read-univ.c, indexdb.c,
+ indexdb_hr.c, sarray-read.c, sarray-write.c, snpindex.c, src, types.h,
+ univinterval.h: Merged revisions 167282 through 168383 from
+ branches/2015-06-10-bigendian to support bigendian architectures
+
+ * Makefile.dna.am, Makefile.util.am: Added instructions for check-bigendian
+
+2015-06-24 twu
+
+ * VERSION, config.site.rescomp.tst: Updated version number
+
+ * algorithm.tex, biblio.bib, discussion.tex, features.tex, introduction.tex,
+ toplevel.tex: Final version
+
+ * stage1hr.c: Added comments
+
+ * gmap.c: Removed message about different batch levels
+
+ * gsnap.c: Added option --master-is-worker for MPI version
+
+ * access.c: Using malloc whenever shmget fails
+
+2015-06-15 twu
+
+ * stage1hr.c: Removed extra #endif statements
+
+ * 2015-statgen, Ambiguous-splicing.eps, Hierarchical-GMAP.eps,
+ Large-hash-table.eps, Makefile.gsnaptoo.am, Overlapping-alignment.eps,
+ VERSION, biblio.bib, config.site.rescomp.tst, toplevel.tex, trunk, util:
+ Updated version number
+
+ * stage1hr.c: Fixed indentation
+
+ * genome.c, genome128_hr.c, gmap.c, gsnap.c, indexdb.c, mode.h,
+ sarray-read.c, src, stage1hr.c, substring.c, uniqscan.c: Merged revisions
+ 165630 through 167691 from branches/2015-05-13-ttoc to implement ttoc mode
+
+ * splice.c: Applied revision 167580 from releases/public-2014-12-17. In
+ group_by_segmenti_aux and group_by_segmentj_aux, checking plusp for each
+ individual hit in deciding whether to group donor or acceptor.
+
+ * bitpack64-readtwo.c: Added debugging statements
+
+ * sarray-read.c: Defining a variable for debugging
+
+ * oligoindex_hr.c: Defining reverse_nt for machines without SSE4.1
+
2015-06-11 twu
+ * stage3hr.c: Changed occurrences of Uintlist_next to Uint8list_next for
+ LARGE_GENOMES
+
+ * oligoindex_hr.c: Providing alternative to _mm_extract_epi32 for machines
+ without SSE4.1
+
+ * access.c, acinclude.m4, configure.ac, shm-flags.m4: Including check for
+ SHM_NORESERVE
+
+ * Makefile.gsnaptoo.am: Removed -lrt
+
+ * sarray-read.c: Initializing chromosome values to be those for chrnum 1 to
+ handle left == 0
+
* VERSION, index.html: Updated version number
* sarray-write.c: Removing rankfile
diff --git a/Makefile.in b/Makefile.in
index c92bdf8..651b3b2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -51,6 +51,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/shm-flags.m4 \
$(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
diff --git a/README b/README
index 9e7ee80..fac3a4c 100644
--- a/README
+++ b/README
@@ -89,15 +89,6 @@ However, to disable this feature, you can add "--disable-bzlib" to the
"disable_bzlib".
-Note 6: GSNAP optionally supports the Goby input and output file
-formats. To implement this functionality, you need to obtain and
-compile the libraries from http://campagnelab.org/software/goby. If
-the resulting header files are located in /path/to/goby/include and
-the library files are in /path/to/goby/lib, you can then add the flag
-"--with-goby=/path/to/goby" to your ./configure command or edit your
-config.site file to have this directory as the value for "with_goby".
-
-
2. Possible issues during compilation
======================================
@@ -122,13 +113,13 @@ instructions work, but popcnt is so widely implemented that they
generally do not cause any problems.)
In that case, you may need to compile your program for the lowest
-common denominator by disabling SSE2 instructions by providing
---disable-sse4.1 or --disable-sse2 to ./configure as necessary.
-Alternatively, your computer cluster may have the ability to detect
-the capabilities of each computer when it receives a job. Then, you
-may want to create different compiled versions of GMAP and GSNAP, and
-call the appropriate binary for that particular job. You will have to
-work with your system administrator if you want to accomplish this.
+common denominator by by providing --disable-avx, --disable-sse4.1, or
+--disable-sse2 to ./configure as necessary. Alternatively, your
+computer cluster may have the ability to detect the capabilities of
+each computer when it receives a job. Then, you may want to create
+different compiled versions of GMAP and GSNAP, and call the
+appropriate binary for that particular job. You will have to work
+with your system administrator if you want to accomplish this.
Compiler issue 2. The most recent versions of GSNAP (starting with
@@ -288,7 +279,7 @@ all other chromosomes in numeric/alphabetical order. If you don't
want this sort, provide the "-s none" flag to gmap_build. Other sort
options besides "none" and "chrom" are "alpha" and "numeric-alpha".
-You can type "gmap_setup --help" to see the full set of options. We
+You can type "gmap_build --help" to see the full set of options. We
discuss some specific situations below.
diff --git a/VERSION b/VERSION
index 00ee667..b93269f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2015-06-10
\ No newline at end of file
+2015-07-23
\ No newline at end of file
diff --git a/acinclude.m4 b/acinclude.m4
index 3bbc4c4..602b9cd 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -7,6 +7,7 @@ m4_include([config/madvise-flags.m4])
m4_include([config/mmap-flags.m4])
m4_include([config/acx_mmap_fixed.m4])
m4_include([config/acx_mmap_variable.m4])
+m4_include([config/shm-flags.m4])
m4_include([config/ax_mpi.m4])
m4_include([config/acx_pthread.m4])
diff --git a/config/shm-flags.m4 b/config/shm-flags.m4
new file mode 100644
index 0000000..04297b0
--- /dev/null
+++ b/config/shm-flags.m4
@@ -0,0 +1,18 @@
+
+AC_DEFUN([ACX_SHM_FLAGS], [
+AC_LANG_SAVE
+AC_LANG(C)
+
+AC_MSG_CHECKING(for SHM_NORESERVE in shmget)
+AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM([[#include <sys/ipc.h>
+#include <sys/shm.h>]],
+ [[int flags = SHM_NORESERVE;]])],
+ [AC_MSG_RESULT(yes)
+ AC_DEFINE([HAVE_SHM_NORESERVE],[1],[Define to 1 if SHM_NORESERVE available for shmget.])],
+ [AC_MSG_RESULT(no)])
+
+AC_LANG_RESTORE
+])
+
+
diff --git a/configure b/configure
index 9689771..9e9988e 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.63 for gmap 2015-06-10.
+# Generated by GNU Autoconf 2.63 for gmap 2015-07-23.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2015-06-10'
-PACKAGE_STRING='gmap 2015-06-10'
+PACKAGE_VERSION='2015-07-23'
+PACKAGE_STRING='gmap 2015-07-23'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
ac_unique_file="src/gmap.c"
@@ -1513,7 +1513,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2015-06-10 to adapt to many kinds of systems.
+\`configure' configures gmap 2015-07-23 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1584,7 +1584,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2015-06-10:";;
+ short | recursive ) echo "Configuration of gmap 2015-07-23:";;
esac
cat <<\_ACEOF
@@ -1721,7 +1721,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2015-06-10
+gmap configure 2015-07-23
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1735,7 +1735,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2015-06-10, which was
+It was created by gmap $as_me 2015-07-23, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
@@ -2105,8 +2105,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:$LINENO: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:$LINENO: result: 2015-06-10" >&5
-$as_echo "2015-06-10" >&6; }
+{ $as_echo "$as_me:$LINENO: result: 2015-07-23" >&5
+$as_echo "2015-07-23" >&6; }
### Read defaults
@@ -4172,7 +4172,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2015-06-10'
+ VERSION='2015-07-23'
cat >>confdefs.h <<_ACEOF
@@ -18793,6 +18793,75 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ $as_echo "$as_me:$LINENO: checking for SHM_NORESERVE in shmget" >&5
+$as_echo_n "checking for SHM_NORESERVE in shmget... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <sys/ipc.h>
+#include <sys/shm.h>
+int
+main ()
+{
+int flags = SHM_NORESERVE;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SHM_NORESERVE 1
+_ACEOF
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
@@ -26522,7 +26591,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2015-06-10, which was
+This file was extended by gmap $as_me 2015-07-23, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -26585,7 +26654,7 @@ Report bugs to <bug-autoconf at gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-gmap config.status 2015-06-10
+gmap config.status 2015-07-23
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index 44b328d..153c7df 100644
--- a/configure.ac
+++ b/configure.ac
@@ -298,6 +298,7 @@ fi
ACX_MMAP_FLAGS
ACX_MADVISE_FLAGS
+ACX_SHM_FLAGS
AC_CHECK_FUNCS([ceil floor index log madvise memcpy memmove memset munmap pow rint stat64 strtoul sysconf sysctl sigaction \
shmget shmctl shmat shmdt semget semctl semop])
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
index 941269f..eaafa03 100644
--- a/mpi/Makefile.in
+++ b/mpi/Makefile.in
@@ -49,6 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/shm-flags.m4 \
$(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
diff --git a/src/Makefile.am b/src/Makefile.am
index 88b6c45..836fbff 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -150,12 +150,12 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
# Note: dist_ commands get read by bootstrap, and don't follow the flags
-# -lrt is needed for shm_open
+# Previously included -lrt for shm_open, but we are not calling that
gsnap_CC = $(PTHREAD_CC)
gsnap_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1
gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
-gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) -lrt
+gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_SOURCES = $(GSNAP_FILES)
diff --git a/src/Makefile.in b/src/Makefile.in
index 55726b0..708e7f3 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -54,6 +54,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/shm-flags.m4 \
$(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
@@ -879,11 +880,11 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
# Note: dist_ commands get read by bootstrap, and don't follow the flags
-# -lrt is needed for shm_open
+# Previously included -lrt for shm_open, but we are not calling that
gsnap_CC = $(PTHREAD_CC)
gsnap_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) $(SIMD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_READLENGTH=$(MAX_READLENGTH) -DGSNAP=1
gsnap_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
-gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) -lrt
+gsnap_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
dist_gsnap_SOURCES = $(GSNAP_FILES)
GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
except.c except.h assert.c assert.h mem.c mem.h \
diff --git a/src/access.c b/src/access.c
index 9be6d68..e819902 100644
--- a/src/access.c
+++ b/src/access.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: access.c 165967 2015-05-20 00:15:27Z twu $";
+static char rcsid[] = "$Id: access.c 170327 2015-07-22 17:50:11Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -592,7 +592,11 @@ shmem_attach (int *shmid, char *filename, off_t filesize, size_t eltsize) {
others wait. They will be woken up when the semaphore is
removed. */
- if ((*shmid = shmget(key,filesize,IPC_CREAT | IPC_EXCL | SHM_NORESERVE | 0666)) != -1) {
+ if ((*shmid = shmget(key,filesize,IPC_CREAT | IPC_EXCL |
+#ifdef HAVE_SHM_NORESERVE
+ SHM_NORESERVE |
+#endif
+ 0666)) != -1) {
/* Created new shared memory */
if ((memory = shmat(*shmid,NULL,0)) == (void *) -1) {
fprintf(stderr,"Error with shmat. Error %d: %s\n",errno,strerror(errno));
@@ -614,8 +618,8 @@ shmem_attach (int *shmid, char *filename, off_t filesize, size_t eltsize) {
}
} else {
- fprintf(stderr,"Error with shmget. Error %d: %s\n",errno,strerror(errno));
- abort();
+ fprintf(stderr,"Using malloc instead of shmget for file %s\n",filename);
+ memory = (void *) NULL;
}
/* The process that proceeded removes the semaphore here, allowing
@@ -677,7 +681,13 @@ Access_allocate (int *shmid, size_t *len, double *seconds, char *filename, size_
MPI_Win_allocate_shared(*len,/*disp_unit*/1,MPI_INFO_NULL,comm,&memory,&win);
MPI_Win_free(&win);
#else
- memory = shmem_attach(&(*shmid),filename,/*filesize*/*len,eltsize);
+ if ((memory = shmem_attach(&(*shmid),filename,/*filesize*/*len,eltsize)) == NULL) {
+ fprintf(stderr,"shm_attach not working on file %s, so using malloc instead on %lu bytes\n",
+ filename,*len);
+ *shmid = 0;
+ memory = (void *) MALLOC(*len);
+ copy_memory_from_file(memory,filename,/*filesize*/*len,eltsize);
+ }
#endif
} else {
*shmid = 0;
diff --git a/src/bigendian.c b/src/bigendian.c
index 886de05..f010f94 100644
--- a/src/bigendian.c
+++ b/src/bigendian.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bigendian.c 99737 2013-06-27 19:33:03Z twu $";
+static char rcsid[] = "$Id: bigendian.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -7,22 +7,21 @@ static char rcsid[] = "$Id: bigendian.c 99737 2013-06-27 19:33:03Z twu $";
#include <unistd.h> /* For read() */
-/*************************************************************************
- * OUTPUT_BIGENDIAN provided to test bigendian code on a littleendian
- * machine. To use, compile all programs with WORDS_BIGENDIAN defined
- * in config.h and define OUTPUT_BIGENDIAN here.
- ************************************************************************/
+/* Same as Littleendian_write_char */
+void
+Bigendian_write_char (unsigned char value, int fd) {
+ unsigned char buf[1];
+
+ buf[0] = value;
+ write(fd,buf,1);
+
+ return;
+}
/************************************************************************
* Int
************************************************************************/
-#ifdef OUTPUT_BIGENDIAN
-int
-Bigendian_convert_int (int littleendian) {
- return littleendian;
-}
-#else
int
Bigendian_convert_int (int littleendian) {
int bigendian;
@@ -37,10 +36,8 @@ Bigendian_convert_int (int littleendian) {
return bigendian;
}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fwrite_int (int value, FILE *fp) {
unsigned char buf[4];
@@ -56,25 +53,8 @@ Bigendian_fwrite_int (int value, FILE *fp) {
return 1;
}
}
-#else
-size_t
-Bigendian_fwrite_int (int value, FILE *fp) {
- unsigned char buf[4];
- buf[0] = (unsigned char) (value & 0xff);
- buf[1] = (unsigned char) ((value >>= 8) & 0xff);
- buf[2] = (unsigned char) ((value >>= 8) & 0xff);
- buf[3] = (unsigned char) ((value >>= 8) & 0xff);
- if (fwrite(buf,sizeof(unsigned char),4,fp) == 0) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- return 1;
- }
-}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fwrite_ints (int *array, int n, FILE *fp) {
unsigned char buf[4];
@@ -93,28 +73,8 @@ Bigendian_fwrite_ints (int *array, int n, FILE *fp) {
}
return n;
}
-#else
-size_t
-Bigendian_fwrite_ints (int *array, int n, FILE *fp) {
- unsigned char buf[4];
- int value, i;
- for (i = 0; i < n; i++) {
- value = array[i];
- buf[0] = (unsigned char) (value & 0xff);
- buf[1] = (unsigned char) ((value >>= 8) & 0xff);
- buf[2] = (unsigned char) ((value >>= 8) & 0xff);
- buf[3] = (unsigned char) ((value >>= 8) & 0xff);
- if (fwrite(buf,sizeof(unsigned char),4,fp) == 0) {
- /* Should set error indicator for stream and set errno */
- return 0;
- }
- }
- return n;
-}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fread_int (int *value, FILE *fp) {
unsigned char buf[4];
@@ -123,87 +83,49 @@ Bigendian_fread_int (int *value, FILE *fp) {
/* Should set error indicator for stream and set errno */
return 0;
} else {
- *value = (buf[0] & 0xff);
+#if 0
+ *value = buf[0];
*value <<= 8;
- *value |= (buf[1] & 0xff);
+ *value |= buf[1];
*value <<= 8;
- *value |= (buf[2] & 0xff);
+ *value |= buf[2];
*value <<= 8;
- *value |= (buf[3] & 0xff);
- return 1;
- }
-}
+ *value |= buf[3];
#else
-size_t
-Bigendian_fread_int (int *value, FILE *fp) {
- unsigned char buf[4];
-
- if (fread(buf,sizeof(unsigned char),4,fp) < 4) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
-#if 0
- fprintf(stderr,"Reading %2X %2X %2X %2X, and using last as most sig\n",buf[0],buf[1],buf[2],buf[3]);
+ *value = ((int) buf[0] << 24) | ((int) buf[1] << 16) | ((int) buf[2] << 8) | (int) buf[3];
#endif
- *value = (buf[3] & 0xff);
- *value <<= 8;
- *value |= (buf[2] & 0xff);
- *value <<= 8;
- *value |= (buf[1] & 0xff);
- *value <<= 8;
- *value |= (buf[0] & 0xff);
return 1;
}
}
-#endif
-#ifdef OUTPUT_BIGENDIAN
-size_t
-Bigendian_fread_ints (int *array, int n, FILE *fp) {
- unsigned char buf[4];
- int value, i;
- for (i = 0; i < n; i++) {
- if (fread(buf,sizeof(unsigned char),4,fp) < 4) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- value = (buf[0] & 0xff);
- value <<= 8;
- value |= (buf[1] & 0xff);
- value <<= 8;
- value |= (buf[2] & 0xff);
- value <<= 8;
- value |= (buf[3] & 0xff);
- array[i] = value;
- }
- }
- return n;
-}
-#else
size_t
Bigendian_fread_ints (int *array, int n, FILE *fp) {
unsigned char buf[4];
- int value, i;
+ /* int value; */
+ int i;
for (i = 0; i < n; i++) {
if (fread(buf,sizeof(unsigned char),4,fp) < 4) {
/* Should set error indicator for stream and set errno */
return 0;
} else {
- value = (buf[3] & 0xff);
+#if 0
+ value = buf[0];
value <<= 8;
- value |= (buf[2] & 0xff);
+ value |= buf[1];
value <<= 8;
- value |= (buf[1] & 0xff);
+ value |= buf[2];
value <<= 8;
- value |= (buf[0] & 0xff);
+ value |= buf[3];
array[i] = value;
+#else
+ array[i] = ((int) buf[0] << 24) | ((int) buf[1] << 16) | ((int) buf[2] << 8) | (int) buf[3];
+#endif
}
}
return n;
}
-#endif
/************************************************************************
@@ -226,7 +148,6 @@ Bigendian_convert_uint (unsigned int littleendian) {
}
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fwrite_uint (unsigned int value, FILE *fp) {
unsigned char buf[4];
@@ -242,26 +163,8 @@ Bigendian_fwrite_uint (unsigned int value, FILE *fp) {
return 1;
}
}
-#else
-size_t
-Bigendian_fwrite_uint (unsigned int value, FILE *fp) {
- unsigned char buf[4];
- buf[0] = (unsigned char) (value & 0xff);
- buf[1] = (unsigned char) ((value >>= 8) & 0xff);
- buf[2] = (unsigned char) ((value >>= 8) & 0xff);
- buf[3] = (unsigned char) ((value >>= 8) & 0xff);
- if (fwrite(buf,sizeof(unsigned char),4,fp) == 0) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- return 1;
- }
-}
-#endif
-
-#ifdef OUTPUT_BIGENDIAN
void
Bigendian_write_uint (unsigned int value, int fd) {
unsigned char buf[4];
@@ -273,20 +176,8 @@ Bigendian_write_uint (unsigned int value, int fd) {
write(fd,buf,4);
return;
}
-#else
-void
-Bigendian_write_uint (unsigned int value, int fd) {
- unsigned char buf[4];
- buf[0] = (unsigned char) (value & 0xff);
- buf[1] = (unsigned char) ((value >>= 8) & 0xff);
- buf[2] = (unsigned char) ((value >>= 8) & 0xff);
- buf[3] = (unsigned char) ((value >>= 8) & 0xff);
- write(fd,buf,4);
-}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fwrite_uints (unsigned int *array, int n, FILE *fp) {
unsigned char buf[4];
@@ -306,30 +197,8 @@ Bigendian_fwrite_uints (unsigned int *array, int n, FILE *fp) {
}
return n;
}
-#else
-size_t
-Bigendian_fwrite_uints (unsigned int *array, int n, FILE *fp) {
- unsigned char buf[4];
- unsigned int value;
- int i;
-
- for (i = 0; i < n; i++) {
- value = array[i];
- buf[0] = (unsigned char) (value & 0xff);
- buf[1] = (unsigned char) ((value >>= 8) & 0xff);
- buf[2] = (unsigned char) ((value >>= 8) & 0xff);
- buf[3] = (unsigned char) ((value >>= 8) & 0xff);
- if (fwrite(buf,sizeof(unsigned char),4,fp) == 0) {
- /* Should set error indicator for stream and set errno */
- return 0;
- }
- }
- return n;
-}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fread_uint (unsigned int *value, FILE *fp) {
unsigned char buf[4];
@@ -338,66 +207,26 @@ Bigendian_fread_uint (unsigned int *value, FILE *fp) {
/* Should set error indicator for stream and set errno */
return 0;
} else {
- *value = (buf[0] & 0xff);
+#if 0
+ *value = buf[0];
*value <<= 8;
- *value |= (buf[1] & 0xff);
+ *value |= buf[1];
*value <<= 8;
- *value |= (buf[2] & 0xff);
+ *value |= buf[2];
*value <<= 8;
- *value |= (buf[3] & 0xff);
- return 1;
- }
-}
+ *value |= buf[3];
#else
-size_t
-Bigendian_fread_uint (unsigned int *value, FILE *fp) {
- unsigned char buf[4];
-
- if (fread(buf,sizeof(unsigned char),4,fp) < 4) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- *value = (buf[3] & 0xff);
- *value <<= 8;
- *value |= (buf[2] & 0xff);
- *value <<= 8;
- *value |= (buf[1] & 0xff);
- *value <<= 8;
- *value |= (buf[0] & 0xff);
+ *value = ((unsigned int) buf[0] << 24) | ((unsigned int) buf[1] << 16) | ((unsigned int) buf[2] << 8) | (unsigned int) buf[3];
+#endif
return 1;
}
}
-#endif
-#ifdef OUTPUT_BIGENDIAN
-size_t
-Bigendian_fread_uints (unsigned int *array, int n, FILE *fp) {
- unsigned char buf[4];
- unsigned int value;
- int i;
- for (i = 0; i < n; i++) {
- if (fread(buf,sizeof(unsigned char),4,fp) < 4) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- value = (buf[0] & 0xff);
- value <<= 8;
- value |= (buf[1] & 0xff);
- value <<= 8;
- value |= (buf[2] & 0xff);
- value <<= 8;
- value |= (buf[3] & 0xff);
- array[i] = value;
- }
- }
- return n;
-}
-#else
size_t
Bigendian_fread_uints (unsigned int *array, int n, FILE *fp) {
unsigned char buf[4];
- unsigned int value;
+ /* unsigned int value; */
int i;
for (i = 0; i < n; i++) {
@@ -405,54 +234,43 @@ Bigendian_fread_uints (unsigned int *array, int n, FILE *fp) {
/* Should set error indicator for stream and set errno */
return 0;
} else {
- value = (buf[3] & 0xff);
+#if 0
+ value = buf[0];
value <<= 8;
- value |= (buf[2] & 0xff);
+ value |= buf[1];
value <<= 8;
- value |= (buf[1] & 0xff);
+ value |= buf[2];
value <<= 8;
- value |= (buf[0] & 0xff);
+ value |= buf[3];
array[i] = value;
+#else
+ array[i] = ((unsigned int) buf[0] << 24) | ((unsigned int) buf[1] << 16) | ((unsigned int) buf[2] << 8) | (unsigned int) buf[3];
+#endif
}
}
return n;
}
-#endif
-#ifdef OUTPUT_BIGENDIAN
unsigned int
Bigendian_fileio_read_uint (int fd) {
unsigned int value = 0U;
unsigned char buf[4];
read(fd,buf,4);
- value = (buf[0] & 0xff);
+#if 0
+ value = buf[0];
value <<= 8;
- value |= (buf[1] & 0xff);
+ value |= buf[1];
value <<= 8;
- value |= (buf[2] & 0xff);
+ value |= buf[2];
value <<= 8;
- value |= (buf[3] & 0xff);
- return value;
-}
+ value |= buf[3];
#else
-unsigned int
-Bigendian_fileio_read_uint (int fd) {
- unsigned int value = 0U;
- unsigned char buf[4];
-
- read(fd,buf,4);
- value = (buf[3] & 0xff);
- value <<= 8;
- value |= (buf[2] & 0xff);
- value <<= 8;
- value |= (buf[1] & 0xff);
- value <<= 8;
- value |= (buf[0] & 0xff);
+ value = ((unsigned int) buf[0] << 24) | ((unsigned int) buf[1] << 16) | ((unsigned int) buf[2] << 8) | (unsigned int) buf[3];
+#endif
return value;
}
-#endif
/************************************************************************
@@ -495,7 +313,6 @@ Bigendian_convert_uint8 (UINT8 littleendian) {
}
-#ifdef OUTPUT_BIGENDIAN
void
Bigendian_write_uint8 (UINT8 value, int fd) {
unsigned char buf[8];
@@ -511,26 +328,8 @@ Bigendian_write_uint8 (UINT8 value, int fd) {
write(fd,buf,8);
return;
}
-#else
-void
-Bigendian_write_uint8 (UINT8 value, int fd) {
- unsigned char buf[8];
-
- buf[0] = (unsigned char) (value & 0xff);
- buf[1] = (unsigned char) ((value >>= 8) & 0xff);
- buf[2] = (unsigned char) ((value >>= 8) & 0xff);
- buf[3] = (unsigned char) ((value >>= 8) & 0xff);
- buf[4] = (unsigned char) ((value >>= 8) & 0xff);
- buf[5] = (unsigned char) ((value >>= 8) & 0xff);
- buf[6] = (unsigned char) ((value >>= 8) & 0xff);
- buf[7] = (unsigned char) ((value >>= 8) & 0xff);
- write(fd,buf,8);
-}
-#endif
-
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fwrite_uint8 (UINT8 value, FILE *fp) {
unsigned char buf[8];
@@ -550,29 +349,8 @@ Bigendian_fwrite_uint8 (UINT8 value, FILE *fp) {
return 1;
}
}
-#else
-size_t
-Bigendian_fwrite_uint8 (UINT8 value, FILE *fp) {
- unsigned char buf[8];
- buf[0] = value & 0xff;
- buf[1] = (value >>= 8) & 0xff;
- buf[2] = (value >>= 8) & 0xff;
- buf[3] = (value >>= 8) & 0xff;
- buf[4] = (value >>= 8) & 0xff;
- buf[5] = (value >>= 8) & 0xff;
- buf[6] = (value >>= 8) & 0xff;
- buf[7] = (value >>= 8) & 0xff;
- if (fwrite(buf,sizeof(unsigned char),8,fp) == 0) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- return 1;
- }
-}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fwrite_uint8s (UINT8 *array, int n, FILE *fp) {
unsigned char buf[8];
@@ -596,34 +374,8 @@ Bigendian_fwrite_uint8s (UINT8 *array, int n, FILE *fp) {
}
return n;
}
-#else
-size_t
-Bigendian_fwrite_uint8s (UINT8 *array, int n, FILE *fp) {
- unsigned char buf[8];
- UINT8 value;
- int i;
-
- for (i = 0; i < n; i++) {
- value = array[i];
- buf[0] = value & 0xff;
- buf[1] = (value >>= 8) & 0xff;
- buf[2] = (value >>= 8) & 0xff;
- buf[3] = (value >>= 8) & 0xff;
- buf[4] = (value >>= 8) & 0xff;
- buf[5] = (value >>= 8) & 0xff;
- buf[6] = (value >>= 8) & 0xff;
- buf[7] = (value >>= 8) & 0xff;
- if (fwrite(buf,sizeof(unsigned char),8,fp) == 0) {
- /* Should set error indicator for stream and set errno */
- return 0;
- }
- }
- return n;
-}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fread_uint8 (UINT8 *value, FILE *fp) {
unsigned char buf[8];
@@ -632,55 +384,26 @@ Bigendian_fread_uint8 (UINT8 *value, FILE *fp) {
/* Should set error indicator for stream and set errno */
return 0;
} else {
- *value = (buf[0] & 0xff);
+ *value = (UINT8) buf[0];
*value <<= 8;
- *value |= (buf[1] & 0xff);
+ *value |= (UINT8) buf[1];
*value <<= 8;
- *value |= (buf[2] & 0xff);
+ *value |= (UINT8) buf[2];
*value <<= 8;
- *value |= (buf[3] & 0xff);
+ *value |= (UINT8) buf[3];
*value <<= 8;
- *value |= (buf[4] & 0xff);
+ *value |= (UINT8) buf[4];
*value <<= 8;
- *value |= (buf[5] & 0xff);
+ *value |= (UINT8) buf[5];
*value <<= 8;
- *value |= (buf[6] & 0xff);
+ *value |= (UINT8) buf[6];
*value <<= 8;
- *value |= (buf[7] & 0xff);
+ *value |= (UINT8) buf[7];
return 1;
}
}
-#else
-size_t
-Bigendian_fread_uint8 (UINT8 *value, FILE *fp) {
- unsigned char buf[8];
-
- if (fread(buf,sizeof(unsigned char),8,fp) < 8) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- *value = (buf[7] & 0xff);
- *value <<= 8;
- *value = (buf[6] & 0xff);
- *value <<= 8;
- *value = (buf[5] & 0xff);
- *value <<= 8;
- *value = (buf[4] & 0xff);
- *value <<= 8;
- *value = (buf[3] & 0xff);
- *value <<= 8;
- *value |= (buf[2] & 0xff);
- *value <<= 8;
- *value |= (buf[1] & 0xff);
- *value <<= 8;
- *value |= (buf[0] & 0xff);
- return 1;
- }
-}
-#endif
-#ifdef OUTPUT_BIGENDIAN
size_t
Bigendian_fread_uint8s (UINT8 *array, int n, FILE *fp) {
unsigned char buf[8];
@@ -692,112 +415,97 @@ Bigendian_fread_uint8s (UINT8 *array, int n, FILE *fp) {
/* Should set error indicator for stream and set errno */
return 0;
} else {
- value = (buf[0] & 0xff);
+ value = (UINT8) buf[0];
value <<= 8;
- value |= (buf[1] & 0xff);
+ value |= (UINT8) buf[1];
value <<= 8;
- value |= (buf[2] & 0xff);
+ value |= (UINT8) buf[2];
value <<= 8;
- value |= (buf[3] & 0xff);
+ value |= (UINT8) buf[3];
value <<= 8;
- value |= (buf[4] & 0xff);
+ value |= (UINT8) buf[4];
value <<= 8;
- value |= (buf[5] & 0xff);
+ value |= (UINT8) buf[5];
value <<= 8;
- value |= (buf[6] & 0xff);
+ value |= (UINT8) buf[6];
value <<= 8;
- value |= (buf[7] & 0xff);
-
+ value |= (UINT8) buf[7];
array[i] = value;
}
}
return n;
}
-#else
-size_t
-Bigendian_fread_uint8s (UINT8 *array, int n, FILE *fp) {
- unsigned char buf[8];
- UINT8 value;
- int i;
- for (i = 0; i < n; i++) {
- if (fread(buf,sizeof(unsigned char),8,fp) < 8) {
- /* Should set error indicator for stream and set errno */
- return 0;
- } else {
- value = (buf[7] & 0xff);
- value <<= 8;
- value = (buf[6] & 0xff);
- value <<= 8;
- value = (buf[5] & 0xff);
- value <<= 8;
- value = (buf[4] & 0xff);
- value <<= 8;
- value = (buf[3] & 0xff);
- value <<= 8;
- value |= (buf[2] & 0xff);
- value <<= 8;
- value |= (buf[1] & 0xff);
- value <<= 8;
- value |= (buf[0] & 0xff);
- array[i] = value;
- }
- }
- return n;
-}
-#endif
-
-#ifdef OUTPUT_BIGENDIAN
UINT8
Bigendian_fileio_read_uint8 (int fd) {
UINT8 value = 0LU;
unsigned char buf[8];
read(fd,buf,8);
- value = (buf[0] & 0xff);
+ value = (UINT8) buf[0];
value <<= 8;
- value |= (buf[1] & 0xff);
+ value |= (UINT8) buf[1];
value <<= 8;
- value |= (buf[2] & 0xff);
+ value |= (UINT8) buf[2];
value <<= 8;
- value |= (buf[3] & 0xff);
+ value |= (UINT8) buf[3];
value <<= 8;
- value |= (buf[4] & 0xff);
+ value |= (UINT8) buf[4];
value <<= 8;
- value |= (buf[5] & 0xff);
+ value |= (UINT8) buf[5];
value <<= 8;
- value |= (buf[6] & 0xff);
+ value |= (UINT8) buf[6];
value <<= 8;
- value |= (buf[7] & 0xff);
+ value |= (UINT8) buf[7];
return value;
}
-#else
-UINT8
-Bigendian_fileio_read_uint8 (int fd) {
- UINT8 value = 0LU;
- unsigned char buf[8];
- read(fd,buf,8);
- value = (buf[7] & 0xff);
- value <<= 8;
- value = (buf[6] & 0xff);
- value <<= 8;
- value = (buf[5] & 0xff);
- value <<= 8;
- value = (buf[4] & 0xff);
- value <<= 8;
- value = (buf[3] & 0xff);
- value <<= 8;
- value |= (buf[2] & 0xff);
- value <<= 8;
- value |= (buf[1] & 0xff);
- value <<= 8;
- value |= (buf[0] & 0xff);
- return value;
+#endif /* HAVE_64_BIT */
+
+
+/************************************************************************
+ * Double
+ ************************************************************************/
+
+size_t
+Bigendian_fwrite_double (double value, FILE *fp) {
+ unsigned char buf[8], *ptr = (unsigned char *) &value;
+ size_t i, j;
+
+ /* buf = (unsigned char *) MALLOC(sizeof(double) * sizeof(unsigned char)); */
+
+ i = 0;
+ j = sizeof(double);
+ while (i < sizeof(double)) {
+ buf[i++] = ptr[--j];
+ }
+
+ if (fwrite(buf,sizeof(unsigned char),sizeof(double),fp) == 0) {
+ /* Should set error indicator for stream and set errno */
+ /* FREE(buf); */
+ return 0;
+ } else {
+ /* FREE(buf); */
+ return sizeof(double)/4;
+ }
}
-#endif
-#endif /* HAVE_64_BIT */
+double
+Bigendian_convert_double (double value) {
+ unsigned char *ptr = (unsigned char *) &value, temp;
+ size_t i, j;
+
+ i = 0;
+ j = sizeof(double);
+ while (i < sizeof(double)) {
+ /* swap */
+ temp = ptr[--j];
+ ptr[j] = ptr[i];
+ ptr[i++] = temp;
+ }
+
+ return value;
+}
diff --git a/src/bigendian.h b/src/bigendian.h
index 7c0528f..46f15e3 100644
--- a/src/bigendian.h
+++ b/src/bigendian.h
@@ -1,4 +1,4 @@
-/* $Id: bigendian.h 157223 2015-01-22 18:43:01Z twu $ */
+/* $Id: bigendian.h 168395 2015-06-26 17:13:13Z twu $ */
#ifndef BIGENDIAN_INCLUDED
#define BIGENDIAN_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -9,6 +9,11 @@
#include <stddef.h>
#include "types.h"
+extern void
+Bigendian_write_char (unsigned char value, int fd);
+
+
+
extern int
Bigendian_convert_int (int littleendian);
extern size_t
@@ -49,9 +54,27 @@ extern size_t
Bigendian_fread_uint8s (UINT8 *array, int n, FILE *fp);
extern UINT8
Bigendian_fileio_read_uint8 (int fd);
+
+#ifdef UTILITYP
+#define Bigendian_convert_univcoord Bigendian_convert_uint8
+#elif defined(LARGE_GENOMES)
+#define Bigendian_convert_univcoord Bigendian_convert_uint8
+#else
+#define Bigendian_convert_univcoord Bigendian_convert_uint
#endif
+#else
+#define Bigendian_convert_univcoord Bigendian_convert_uint
+#endif
+
+
+extern double
+Bigendian_convert_double (double value);
+extern size_t
+Bigendian_fwrite_double (double value, FILE *fp);
+
+#define FREAD_CHAR(p,fp) fread(p,sizeof(unsigned char),1,fp)
#define FREAD_INT(p,fp) Bigendian_fread_int(p,fp)
#define FREAD_UINT(p,fp) Bigendian_fread_uint(p,fp)
#define FREAD_INTS(a,n,fp) Bigendian_fread_ints(a,n,fp)
@@ -61,8 +84,11 @@ Bigendian_fileio_read_uint8 (int fd);
#define FREAD_UINT8S(a,n,fp) Bigendian_fread_uint8s(a,n,fp)
#endif
+#define FWRITE_CHAR(x,fp) fwrite(&(x),sizeof(unsigned char),1,fp)
#define FWRITE_INT(x,fp) Bigendian_fwrite_int(x,fp)
#define FWRITE_UINT(x,fp) Bigendian_fwrite_uint(x,fp)
+#define FWRITE_DOUBLE(x,fp) Bigendian_fwrite_double(x,fp)
+#define WRITE_CHAR(x,fd) Bigendian_write_char(x,fd)
#define WRITE_UINT(x,fd) Bigendian_write_uint(x,fd)
#define WRITE_UINT8(x,fd) Bigendian_write_uin8t(x,fd)
#define FWRITE_INTS(a,n,fp) Bigendian_fwrite_ints(a,n,fp)
diff --git a/src/bitpack64-access.c b/src/bitpack64-access.c
index c4621dd..d69cc43 100644
--- a/src/bitpack64-access.c
+++ b/src/bitpack64-access.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-access.c 132144 2014-04-02 16:02:28Z twu $";
+static char rcsid[] = "$Id: bitpack64-access.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8,6 +8,14 @@ static char rcsid[] = "$Id: bitpack64-access.c 132144 2014-04-02 16:02:28Z twu $
#include <stdio.h>
#include <stdlib.h>
+#ifdef WORDS_BIGENDIAN
+#include "bigendian.h"
+#define CONVERT(x) Bigendian_convert_uint(x)
+#else
+#define CONVERT(x) x
+#endif
+
+
#ifdef DEBUG
#define debug(x) x
#else
@@ -35,232 +43,232 @@ access_00 (const UINT4 *in) {
static UINT4
access_02_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 2 ) ;
}
static UINT4
access_02_01 (const UINT4 *in) {
- return ( (*in) >> 2 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 2 ) ;
}
static UINT4
access_02_02 (const UINT4 *in) {
- return ( (*in) >> 4 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 2 ) ;
}
static UINT4
access_02_03 (const UINT4 *in) {
- return ( (*in) >> 6 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 6 ) % (1U << 2 ) ;
}
static UINT4
access_02_04 (const UINT4 *in) {
- return ( (*in) >> 8 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 2 ) ;
}
static UINT4
access_02_05 (const UINT4 *in) {
- return ( (*in) >> 10 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 10 ) % (1U << 2 ) ;
}
static UINT4
access_02_06 (const UINT4 *in) {
- return ( (*in) >> 12 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 2 ) ;
}
static UINT4
access_02_07 (const UINT4 *in) {
- return ( (*in) >> 14 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 14 ) % (1U << 2 ) ;
}
static UINT4
access_02_08 (const UINT4 *in) {
- return ( (*in) >> 16 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 2 ) ;
}
static UINT4
access_02_09 (const UINT4 *in) {
- return ( (*in) >> 18 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 18 ) % (1U << 2 ) ;
}
static UINT4
access_02_10 (const UINT4 *in) {
- return ( (*in) >> 20 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 20 ) % (1U << 2 ) ;
}
static UINT4
access_02_11 (const UINT4 *in) {
- return ( (*in) >> 22 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 22 ) % (1U << 2 ) ;
}
static UINT4
access_02_12 (const UINT4 *in) {
- return ( (*in) >> 24 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 2 ) ;
}
static UINT4
access_02_13 (const UINT4 *in) {
- return ( (*in) >> 26 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 26 ) % (1U << 2 ) ;
}
static UINT4
access_02_14 (const UINT4 *in) {
- return ( (*in) >> 28 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 28 ) % (1U << 2 ) ;
}
static UINT4
access_02_15 (const UINT4 *in) {
- return ( (*in) >> 30 ) % (1U << 2 ) ;
+ return ( CONVERT(*in) >> 30 ) % (1U << 2 ) ;
}
static UINT4
access_04_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 4 ) ;
}
static UINT4
access_04_01 (const UINT4 *in) {
- return ( (*in) >> 4 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 4 ) ;
}
static UINT4
access_04_02 (const UINT4 *in) {
- return ( (*in) >> 8 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 4 ) ;
}
static UINT4
access_04_03 (const UINT4 *in) {
- return ( (*in) >> 12 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 4 ) ;
}
static UINT4
access_04_04 (const UINT4 *in) {
- return ( (*in) >> 16 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 4 ) ;
}
static UINT4
access_04_05 (const UINT4 *in) {
- return ( (*in) >> 20 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 20 ) % (1U << 4 ) ;
}
static UINT4
access_04_06 (const UINT4 *in) {
- return ( (*in) >> 24 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 4 ) ;
}
static UINT4
access_04_07 (const UINT4 *in) {
- return ( (*in) >> 28 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 28 ) % (1U << 4 ) ;
}
static UINT4
access_04_08 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 4 ) ;
}
static UINT4
access_04_09 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 4 ) ;
}
static UINT4
access_04_10 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 4 ) ;
}
static UINT4
access_04_11 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 12 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 4 ) ;
}
static UINT4
access_04_12 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 4 ) ;
}
static UINT4
access_04_13 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 20 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 20 ) % (1U << 4 ) ;
}
static UINT4
access_04_14 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 24 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 4 ) ;
}
static UINT4
access_04_15 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 28 ) % (1U << 4 ) ;
+ return ( CONVERT(*in) >> 28 ) % (1U << 4 ) ;
}
static UINT4
access_06_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 6 ) ;
}
static UINT4
access_06_01 (const UINT4 *in) {
- return ( (*in) >> 6 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 6 ) % (1U << 6 ) ;
}
static UINT4
access_06_02 (const UINT4 *in) {
- return ( (*in) >> 12 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 6 ) ;
}
static UINT4
access_06_03 (const UINT4 *in) {
- return ( (*in) >> 18 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 18 ) % (1U << 6 ) ;
}
static UINT4
access_06_04 (const UINT4 *in) {
- return ( (*in) >> 24 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 6 ) ;
}
static UINT4
access_06_05 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 30 ) % (1U << 6 ) ;
+ out = ( CONVERT(*in) >> 30 ) % (1U << 6 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 6 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 6 - 4 );
return out;
}
static UINT4
access_06_06 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 6 ) ;
}
static UINT4
access_06_07 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 10 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 10 ) % (1U << 6 ) ;
}
static UINT4
access_06_08 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 6 ) ;
}
static UINT4
access_06_09 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 22 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 22 ) % (1U << 6 ) ;
}
static UINT4
@@ -268,171 +276,171 @@ access_06_10 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 6 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 6 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 2 ))<<( 6 - 2 );
+ out |= (CONVERT(*in) % (1U<< 2 ))<<( 6 - 2 );
return out;
}
static UINT4
access_06_11 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 2 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 6 ) ;
}
static UINT4
access_06_12 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 6 ) ;
}
static UINT4
access_06_13 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 14 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 14 ) % (1U << 6 ) ;
}
static UINT4
access_06_14 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 20 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 20 ) % (1U << 6 ) ;
}
static UINT4
access_06_15 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 26 ) % (1U << 6 ) ;
+ return ( CONVERT(*in) >> 26 ) % (1U << 6 ) ;
}
static UINT4
access_08_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 8 ) ;
}
static UINT4
access_08_01 (const UINT4 *in) {
- return ( (*in) >> 8 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 8 ) ;
}
static UINT4
access_08_02 (const UINT4 *in) {
- return ( (*in) >> 16 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 8 ) ;
}
static UINT4
access_08_03 (const UINT4 *in) {
- return ( (*in) >> 24 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 8 ) ;
}
static UINT4
access_08_04 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 8 ) ;
}
static UINT4
access_08_05 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 8 ) ;
}
static UINT4
access_08_06 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 8 ) ;
}
static UINT4
access_08_07 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 24 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 8 ) ;
}
static UINT4
access_08_08 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 8 ) ;
}
static UINT4
access_08_09 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 8 ) ;
}
static UINT4
access_08_10 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 8 ) ;
}
static UINT4
access_08_11 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 24 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 8 ) ;
}
static UINT4
access_08_12 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 8 ) ;
}
static UINT4
access_08_13 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 8 ) ;
}
static UINT4
access_08_14 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 8 ) ;
}
static UINT4
access_08_15 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 24 ) % (1U << 8 ) ;
+ return ( CONVERT(*in) >> 24 ) % (1U << 8 ) ;
}
static UINT4
access_10_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 10 ) ;
}
static UINT4
access_10_01 (const UINT4 *in) {
- return ( (*in) >> 10 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 10 ) % (1U << 10 ) ;
}
static UINT4
access_10_02 (const UINT4 *in) {
- return ( (*in) >> 20 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 20 ) % (1U << 10 ) ;
}
static UINT4
access_10_03 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 30 ) % (1U << 10 ) ;
+ out = ( CONVERT(*in) >> 30 ) % (1U << 10 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 10 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 10 - 8 );
return out;
}
static UINT4
access_10_04 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 10 ) ;
}
static UINT4
access_10_05 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 18 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 18 ) % (1U << 10 ) ;
}
static UINT4
@@ -440,22 +448,22 @@ access_10_06 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 10 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 10 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 6 ))<<( 10 - 6 );
+ out |= (CONVERT(*in) % (1U<< 6 ))<<( 10 - 6 );
return out;
}
static UINT4
access_10_07 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 6 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 6 ) % (1U << 10 ) ;
}
static UINT4
access_10_08 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 10 ) ;
}
static UINT4
@@ -463,22 +471,22 @@ access_10_09 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 26 ) % (1U << 10 ) ;
+ out = ( CONVERT(*in) >> 26 ) % (1U << 10 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 10 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 10 - 4 );
return out;
}
static UINT4
access_10_10 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 10 ) ;
}
static UINT4
access_10_11 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 14 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 14 ) % (1U << 10 ) ;
}
static UINT4
@@ -486,61 +494,61 @@ access_10_12 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 10 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 10 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 2 ))<<( 10 - 2 );
+ out |= (CONVERT(*in) % (1U<< 2 ))<<( 10 - 2 );
return out;
}
static UINT4
access_10_13 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 2 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 10 ) ;
}
static UINT4
access_10_14 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 12 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 10 ) ;
}
static UINT4
access_10_15 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 22 ) % (1U << 10 ) ;
+ return ( CONVERT(*in) >> 22 ) % (1U << 10 ) ;
}
static UINT4
access_12_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 12 ) ;
}
static UINT4
access_12_01 (const UINT4 *in) {
- return ( (*in) >> 12 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 12 ) ;
}
static UINT4
access_12_02 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 24 ) % (1U << 12 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 12 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 12 - 4 );
return out;
}
static UINT4
access_12_03 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 12 ) ;
}
static UINT4
access_12_04 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 12 ) ;
}
static UINT4
@@ -548,34 +556,34 @@ access_12_05 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 12 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 12 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 12 - 8 );
return out;
}
static UINT4
access_12_06 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 12 ) ;
}
static UINT4
access_12_07 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 20 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 20 ) % (1U << 12 ) ;
}
static UINT4
access_12_08 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 12 ) ;
}
static UINT4
access_12_09 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 12 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 12 ) ;
}
static UINT4
@@ -583,22 +591,22 @@ access_12_10 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 12 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 12 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 12 - 4 );
return out;
}
static UINT4
access_12_11 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 12 ) ;
}
static UINT4
access_12_12 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 12 ) ;
}
static UINT4
@@ -606,49 +614,49 @@ access_12_13 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 12 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 12 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 12 - 8 );
return out;
}
static UINT4
access_12_14 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 12 ) ;
}
static UINT4
access_12_15 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 20 ) % (1U << 12 ) ;
+ return ( CONVERT(*in) >> 20 ) % (1U << 12 ) ;
}
static UINT4
access_14_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 14 ) ;
}
static UINT4
access_14_01 (const UINT4 *in) {
- return ( (*in) >> 14 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 14 ) % (1U << 14 ) ;
}
static UINT4
access_14_02 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 28 ) % (1U << 14 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 14 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 10 ))<<( 14 - 10 );
+ out |= (CONVERT(*in) % (1U<< 10 ))<<( 14 - 10 );
return out;
}
static UINT4
access_14_03 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 10 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 10 ) % (1U << 14 ) ;
}
static UINT4
@@ -656,16 +664,16 @@ access_14_04 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 14 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 14 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 6 ))<<( 14 - 6 );
+ out |= (CONVERT(*in) % (1U<< 6 ))<<( 14 - 6 );
return out;
}
static UINT4
access_14_05 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 6 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 6 ) % (1U << 14 ) ;
}
static UINT4
@@ -673,22 +681,22 @@ access_14_06 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 14 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 14 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 2 ))<<( 14 - 2 );
+ out |= (CONVERT(*in) % (1U<< 2 ))<<( 14 - 2 );
return out;
}
static UINT4
access_14_07 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 2 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 14 ) ;
}
static UINT4
access_14_08 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 14 ) ;
}
static UINT4
@@ -696,16 +704,16 @@ access_14_09 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 30 ) % (1U << 14 ) ;
+ out = ( CONVERT(*in) >> 30 ) % (1U << 14 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 14 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 14 - 12 );
return out;
}
static UINT4
access_14_10 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 12 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 14 ) ;
}
static UINT4
@@ -713,16 +721,16 @@ access_14_11 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 26 ) % (1U << 14 ) ;
+ out = ( CONVERT(*in) >> 26 ) % (1U << 14 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 14 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 14 - 8 );
return out;
}
static UINT4
access_14_12 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 14 ) ;
}
static UINT4
@@ -730,139 +738,139 @@ access_14_13 (const UINT4 *in) {
UINT4 out;
in += 5 * WORD_INCR;
- out = ( (*in) >> 22 ) % (1U << 14 ) ;
+ out = ( CONVERT(*in) >> 22 ) % (1U << 14 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 14 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 14 - 4 );
return out;
}
static UINT4
access_14_14 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 14 ) ;
}
static UINT4
access_14_15 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 18 ) % (1U << 14 ) ;
+ return ( CONVERT(*in) >> 18 ) % (1U << 14 ) ;
}
static UINT4
access_16_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_01 (const UINT4 *in) {
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_16_02 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_03 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_16_04 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_05 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_16_06 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_07 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_16_08 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_09 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_16_10 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_11 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_16_12 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_13 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_16_14 (const UINT4 *in) {
in += 7 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 16 ) ;
}
static UINT4
access_16_15 (const UINT4 *in) {
in += 7 * WORD_INCR;
- return ( (*in) >> 16 ) % (1U << 16 ) ;
+ return ( CONVERT(*in) >> 16 ) % (1U << 16 ) ;
}
static UINT4
access_18_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 18 ) ;
}
static UINT4
access_18_01 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 18 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 18 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 18 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 18 - 4 );
return out;
}
static UINT4
access_18_02 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 18 ) ;
}
static UINT4
@@ -870,16 +878,16 @@ access_18_03 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 22 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 22 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 18 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 18 - 8 );
return out;
}
static UINT4
access_18_04 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 18 ) ;
}
static UINT4
@@ -887,16 +895,16 @@ access_18_05 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 26 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 26 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 18 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 18 - 12 );
return out;
}
static UINT4
access_18_06 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 12 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 18 ) ;
}
static UINT4
@@ -904,9 +912,9 @@ access_18_07 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 30 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 30 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 18 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 18 - 16 );
return out;
}
@@ -915,16 +923,16 @@ access_18_08 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 2 ))<<( 18 - 2 );
+ out |= (CONVERT(*in) % (1U<< 2 ))<<( 18 - 2 );
return out;
}
static UINT4
access_18_09 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 2 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 18 ) ;
}
static UINT4
@@ -932,16 +940,16 @@ access_18_10 (const UINT4 *in) {
UINT4 out;
in += 5 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 6 ))<<( 18 - 6 );
+ out |= (CONVERT(*in) % (1U<< 6 ))<<( 18 - 6 );
return out;
}
static UINT4
access_18_11 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 6 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 6 ) % (1U << 18 ) ;
}
static UINT4
@@ -949,16 +957,16 @@ access_18_12 (const UINT4 *in) {
UINT4 out;
in += 6 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 10 ))<<( 18 - 10 );
+ out |= (CONVERT(*in) % (1U<< 10 ))<<( 18 - 10 );
return out;
}
static UINT4
access_18_13 (const UINT4 *in) {
in += 7 * WORD_INCR;
- return ( (*in) >> 10 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 10 ) % (1U << 18 ) ;
}
static UINT4
@@ -966,38 +974,38 @@ access_18_14 (const UINT4 *in) {
UINT4 out;
in += 7 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 18 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 18 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 14 ))<<( 18 - 14 );
+ out |= (CONVERT(*in) % (1U<< 14 ))<<( 18 - 14 );
return out;
}
static UINT4
access_18_15 (const UINT4 *in) {
in += 8 * WORD_INCR;
- return ( (*in) >> 14 ) % (1U << 18 ) ;
+ return ( CONVERT(*in) >> 14 ) % (1U << 18 ) ;
}
static UINT4
access_20_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 20 ) ;
}
static UINT4
access_20_01 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 20 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 20 - 8 );
return out;
}
static UINT4
access_20_02 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 20 ) ;
}
static UINT4
@@ -1005,9 +1013,9 @@ access_20_03 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 20 - 16 );
return out;
}
@@ -1016,16 +1024,16 @@ access_20_04 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 20 - 4 );
return out;
}
static UINT4
access_20_05 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 20 ) ;
}
static UINT4
@@ -1033,22 +1041,22 @@ access_20_06 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 20 - 12 );
return out;
}
static UINT4
access_20_07 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 12 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 20 ) ;
}
static UINT4
access_20_08 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 20 ) ;
}
static UINT4
@@ -1056,16 +1064,16 @@ access_20_09 (const UINT4 *in) {
UINT4 out;
in += 5 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 20 - 8 );
return out;
}
static UINT4
access_20_10 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 20 ) ;
}
static UINT4
@@ -1073,9 +1081,9 @@ access_20_11 (const UINT4 *in) {
UINT4 out;
in += 6 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 20 - 16 );
return out;
}
@@ -1084,16 +1092,16 @@ access_20_12 (const UINT4 *in) {
UINT4 out;
in += 7 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 20 - 4 );
return out;
}
static UINT4
access_20_13 (const UINT4 *in) {
in += 8 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 20 ) ;
}
static UINT4
@@ -1101,31 +1109,31 @@ access_20_14 (const UINT4 *in) {
UINT4 out;
in += 8 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 20 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 20 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 20 - 12 );
return out;
}
static UINT4
access_20_15 (const UINT4 *in) {
in += 9 * WORD_INCR;
- return ( (*in) >> 12 ) % (1U << 20 ) ;
+ return ( CONVERT(*in) >> 12 ) % (1U << 20 ) ;
}
static UINT4
access_22_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 22 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 22 ) ;
}
static UINT4
access_22_01 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 22 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 22 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 22 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 22 - 12 );
return out;
}
@@ -1134,16 +1142,16 @@ access_22_02 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 12 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 12 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 2 ))<<( 22 - 2 );
+ out |= (CONVERT(*in) % (1U<< 2 ))<<( 22 - 2 );
return out;
}
static UINT4
access_22_03 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 2 ) % (1U << 22 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 22 ) ;
}
static UINT4
@@ -1151,9 +1159,9 @@ access_22_04 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 14 ))<<( 22 - 14 );
+ out |= (CONVERT(*in) % (1U<< 14 ))<<( 22 - 14 );
return out;
}
@@ -1162,16 +1170,16 @@ access_22_05 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 14 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 14 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 22 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 22 - 4 );
return out;
}
static UINT4
access_22_06 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 22 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 22 ) ;
}
static UINT4
@@ -1179,9 +1187,9 @@ access_22_07 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 26 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 26 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 22 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 22 - 16 );
return out;
}
@@ -1190,16 +1198,16 @@ access_22_08 (const UINT4 *in) {
UINT4 out;
in += 5 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 6 ))<<( 22 - 6 );
+ out |= (CONVERT(*in) % (1U<< 6 ))<<( 22 - 6 );
return out;
}
static UINT4
access_22_09 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 6 ) % (1U << 22 ) ;
+ return ( CONVERT(*in) >> 6 ) % (1U << 22 ) ;
}
static UINT4
@@ -1207,9 +1215,9 @@ access_22_10 (const UINT4 *in) {
UINT4 out;
in += 6 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 18 ))<<( 22 - 18 );
+ out |= (CONVERT(*in) % (1U<< 18 ))<<( 22 - 18 );
return out;
}
@@ -1218,16 +1226,16 @@ access_22_11 (const UINT4 *in) {
UINT4 out;
in += 7 * WORD_INCR;
- out = ( (*in) >> 18 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 18 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 22 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 22 - 8 );
return out;
}
static UINT4
access_22_12 (const UINT4 *in) {
in += 8 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 22 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 22 ) ;
}
static UINT4
@@ -1235,9 +1243,9 @@ access_22_13 (const UINT4 *in) {
UINT4 out;
in += 8 * WORD_INCR;
- out = ( (*in) >> 30 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 30 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 20 ))<<( 22 - 20 );
+ out |= (CONVERT(*in) % (1U<< 20 ))<<( 22 - 20 );
return out;
}
@@ -1246,32 +1254,32 @@ access_22_14 (const UINT4 *in) {
UINT4 out;
in += 9 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 22 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 22 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 10 ))<<( 22 - 10 );
+ out |= (CONVERT(*in) % (1U<< 10 ))<<( 22 - 10 );
return out;
}
static UINT4
access_22_15 (const UINT4 *in) {
in += 10 * WORD_INCR;
- return ( (*in) >> 10 ) % (1U << 22 ) ;
+ return ( CONVERT(*in) >> 10 ) % (1U << 22 ) ;
}
static UINT4
access_24_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 24 ) ;
}
static UINT4
access_24_01 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 24 - 16 );
return out;
}
@@ -1280,22 +1288,22 @@ access_24_02 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 24 - 8 );
return out;
}
static UINT4
access_24_03 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 24 ) ;
}
static UINT4
access_24_04 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 24 ) ;
}
static UINT4
@@ -1303,9 +1311,9 @@ access_24_05 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 24 - 16 );
return out;
}
@@ -1314,22 +1322,22 @@ access_24_06 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 24 - 8 );
return out;
}
static UINT4
access_24_07 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 24 ) ;
}
static UINT4
access_24_08 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 24 ) ;
}
static UINT4
@@ -1337,9 +1345,9 @@ access_24_09 (const UINT4 *in) {
UINT4 out;
in += 6 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 24 - 16 );
return out;
}
@@ -1348,22 +1356,22 @@ access_24_10 (const UINT4 *in) {
UINT4 out;
in += 7 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 24 - 8 );
return out;
}
static UINT4
access_24_11 (const UINT4 *in) {
in += 8 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 24 ) ;
}
static UINT4
access_24_12 (const UINT4 *in) {
in += 9 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 24 ) ;
}
static UINT4
@@ -1371,9 +1379,9 @@ access_24_13 (const UINT4 *in) {
UINT4 out;
in += 9 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 24 - 16 );
return out;
}
@@ -1382,32 +1390,32 @@ access_24_14 (const UINT4 *in) {
UINT4 out;
in += 10 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 24 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 24 - 8 );
return out;
}
static UINT4
access_24_15 (const UINT4 *in) {
in += 11 * WORD_INCR;
- return ( (*in) >> 8 ) % (1U << 24 ) ;
+ return ( CONVERT(*in) >> 8 ) % (1U << 24 ) ;
}
static UINT4
access_26_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 26 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 26 ) ;
}
static UINT4
access_26_01 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 26 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 26 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 20 ))<<( 26 - 20 );
+ out |= (CONVERT(*in) % (1U<< 20 ))<<( 26 - 20 );
return out;
}
@@ -1416,9 +1424,9 @@ access_26_02 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 14 ))<<( 26 - 14 );
+ out |= (CONVERT(*in) % (1U<< 14 ))<<( 26 - 14 );
return out;
}
@@ -1427,9 +1435,9 @@ access_26_03 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 14 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 14 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 26 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 26 - 8 );
return out;
}
@@ -1438,16 +1446,16 @@ access_26_04 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 8 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 8 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 2 ))<<( 26 - 2 );
+ out |= (CONVERT(*in) % (1U<< 2 ))<<( 26 - 2 );
return out;
}
static UINT4
access_26_05 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return ( (*in) >> 2 ) % (1U << 26 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 26 ) ;
}
static UINT4
@@ -1455,9 +1463,9 @@ access_26_06 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 22 ))<<( 26 - 22 );
+ out |= (CONVERT(*in) % (1U<< 22 ))<<( 26 - 22 );
return out;
}
@@ -1466,9 +1474,9 @@ access_26_07 (const UINT4 *in) {
UINT4 out;
in += 5 * WORD_INCR;
- out = ( (*in) >> 22 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 22 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 26 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 26 - 16 );
return out;
}
@@ -1477,9 +1485,9 @@ access_26_08 (const UINT4 *in) {
UINT4 out;
in += 6 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 10 ))<<( 26 - 10 );
+ out |= (CONVERT(*in) % (1U<< 10 ))<<( 26 - 10 );
return out;
}
@@ -1488,16 +1496,16 @@ access_26_09 (const UINT4 *in) {
UINT4 out;
in += 7 * WORD_INCR;
- out = ( (*in) >> 10 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 10 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 26 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 26 - 4 );
return out;
}
static UINT4
access_26_10 (const UINT4 *in) {
in += 8 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 26 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 26 ) ;
}
static UINT4
@@ -1505,9 +1513,9 @@ access_26_11 (const UINT4 *in) {
UINT4 out;
in += 8 * WORD_INCR;
- out = ( (*in) >> 30 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 30 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 24 ))<<( 26 - 24 );
+ out |= (CONVERT(*in) % (1U<< 24 ))<<( 26 - 24 );
return out;
}
@@ -1516,9 +1524,9 @@ access_26_12 (const UINT4 *in) {
UINT4 out;
in += 9 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 18 ))<<( 26 - 18 );
+ out |= (CONVERT(*in) % (1U<< 18 ))<<( 26 - 18 );
return out;
}
@@ -1527,9 +1535,9 @@ access_26_13 (const UINT4 *in) {
UINT4 out;
in += 10 * WORD_INCR;
- out = ( (*in) >> 18 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 18 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 26 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 26 - 12 );
return out;
}
@@ -1538,31 +1546,31 @@ access_26_14 (const UINT4 *in) {
UINT4 out;
in += 11 * WORD_INCR;
- out = ( (*in) >> 12 ) % (1U << 26 ) ;
+ out = ( CONVERT(*in) >> 12 ) % (1U << 26 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 6 ))<<( 26 - 6 );
+ out |= (CONVERT(*in) % (1U<< 6 ))<<( 26 - 6 );
return out;
}
static UINT4
access_26_15 (const UINT4 *in) {
in += 12 * WORD_INCR;
- return ( (*in) >> 6 ) % (1U << 26 ) ;
+ return ( CONVERT(*in) >> 6 ) % (1U << 26 ) ;
}
static UINT4
access_28_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 28 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 28 ) ;
}
static UINT4
access_28_01 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 28 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
+ out |= (CONVERT(*in) % (1U<< 24 ))<<( 28 - 24 );
return out;
}
@@ -1571,9 +1579,9 @@ access_28_02 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
+ out |= (CONVERT(*in) % (1U<< 20 ))<<( 28 - 20 );
return out;
}
@@ -1582,9 +1590,9 @@ access_28_03 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 28 - 16 );
return out;
}
@@ -1593,9 +1601,9 @@ access_28_04 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 28 - 12 );
return out;
}
@@ -1604,9 +1612,9 @@ access_28_05 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 12 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 12 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 28 - 8 );
return out;
}
@@ -1615,22 +1623,22 @@ access_28_06 (const UINT4 *in) {
UINT4 out;
in += 5 * WORD_INCR;
- out = ( (*in) >> 8 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 8 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 28 - 4 );
return out;
}
static UINT4
access_28_07 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 28 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 28 ) ;
}
static UINT4
access_28_08 (const UINT4 *in) {
in += 7 * WORD_INCR;
- return ( (*in) >> 0 ) % (1U << 28 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 28 ) ;
}
static UINT4
@@ -1638,9 +1646,9 @@ access_28_09 (const UINT4 *in) {
UINT4 out;
in += 7 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
+ out |= (CONVERT(*in) % (1U<< 24 ))<<( 28 - 24 );
return out;
}
@@ -1649,9 +1657,9 @@ access_28_10 (const UINT4 *in) {
UINT4 out;
in += 8 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
+ out |= (CONVERT(*in) % (1U<< 20 ))<<( 28 - 20 );
return out;
}
@@ -1660,9 +1668,9 @@ access_28_11 (const UINT4 *in) {
UINT4 out;
in += 9 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 28 - 16 );
return out;
}
@@ -1671,9 +1679,9 @@ access_28_12 (const UINT4 *in) {
UINT4 out;
in += 10 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 28 - 12 );
return out;
}
@@ -1682,9 +1690,9 @@ access_28_13 (const UINT4 *in) {
UINT4 out;
in += 11 * WORD_INCR;
- out = ( (*in) >> 12 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 12 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 28 - 8 );
return out;
}
@@ -1693,31 +1701,31 @@ access_28_14 (const UINT4 *in) {
UINT4 out;
in += 12 * WORD_INCR;
- out = ( (*in) >> 8 ) % (1U << 28 ) ;
+ out = ( CONVERT(*in) >> 8 ) % (1U << 28 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 28 - 4 );
return out;
}
static UINT4
access_28_15 (const UINT4 *in) {
in += 13 * WORD_INCR;
- return ( (*in) >> 4 ) % (1U << 28 ) ;
+ return ( CONVERT(*in) >> 4 ) % (1U << 28 ) ;
}
static UINT4
access_30_00 (const UINT4 *in) {
- return ( (*in) >> 0 ) % (1U << 30 ) ;
+ return ( CONVERT(*in) >> 0 ) % (1U << 30 ) ;
}
static UINT4
access_30_01 (const UINT4 *in) {
UINT4 out;
- out = ( (*in) >> 30 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 30 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 28 ))<<( 30 - 28 );
+ out |= (CONVERT(*in) % (1U<< 28 ))<<( 30 - 28 );
return out;
}
@@ -1726,9 +1734,9 @@ access_30_02 (const UINT4 *in) {
UINT4 out;
in += 1 * WORD_INCR;
- out = ( (*in) >> 28 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 28 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 26 ))<<( 30 - 26 );
+ out |= (CONVERT(*in) % (1U<< 26 ))<<( 30 - 26 );
return out;
}
@@ -1737,9 +1745,9 @@ access_30_03 (const UINT4 *in) {
UINT4 out;
in += 2 * WORD_INCR;
- out = ( (*in) >> 26 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 26 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 24 ))<<( 30 - 24 );
+ out |= (CONVERT(*in) % (1U<< 24 ))<<( 30 - 24 );
return out;
}
@@ -1748,9 +1756,9 @@ access_30_04 (const UINT4 *in) {
UINT4 out;
in += 3 * WORD_INCR;
- out = ( (*in) >> 24 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 24 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 22 ))<<( 30 - 22 );
+ out |= (CONVERT(*in) % (1U<< 22 ))<<( 30 - 22 );
return out;
}
@@ -1759,9 +1767,9 @@ access_30_05 (const UINT4 *in) {
UINT4 out;
in += 4 * WORD_INCR;
- out = ( (*in) >> 22 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 22 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 20 ))<<( 30 - 20 );
+ out |= (CONVERT(*in) % (1U<< 20 ))<<( 30 - 20 );
return out;
}
@@ -1770,9 +1778,9 @@ access_30_06 (const UINT4 *in) {
UINT4 out;
in += 5 * WORD_INCR;
- out = ( (*in) >> 20 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 20 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 18 ))<<( 30 - 18 );
+ out |= (CONVERT(*in) % (1U<< 18 ))<<( 30 - 18 );
return out;
}
@@ -1781,9 +1789,9 @@ access_30_07 (const UINT4 *in) {
UINT4 out;
in += 6 * WORD_INCR;
- out = ( (*in) >> 18 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 18 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 16 ))<<( 30 - 16 );
+ out |= (CONVERT(*in) % (1U<< 16 ))<<( 30 - 16 );
return out;
}
@@ -1792,9 +1800,9 @@ access_30_08 (const UINT4 *in) {
UINT4 out;
in += 7 * WORD_INCR;
- out = ( (*in) >> 16 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 16 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 14 ))<<( 30 - 14 );
+ out |= (CONVERT(*in) % (1U<< 14 ))<<( 30 - 14 );
return out;
}
@@ -1803,9 +1811,9 @@ access_30_09 (const UINT4 *in) {
UINT4 out;
in += 8 * WORD_INCR;
- out = ( (*in) >> 14 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 14 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 12 ))<<( 30 - 12 );
+ out |= (CONVERT(*in) % (1U<< 12 ))<<( 30 - 12 );
return out;
}
@@ -1814,9 +1822,9 @@ access_30_10 (const UINT4 *in) {
UINT4 out;
in += 9 * WORD_INCR;
- out = ( (*in) >> 12 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 12 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 10 ))<<( 30 - 10 );
+ out |= (CONVERT(*in) % (1U<< 10 ))<<( 30 - 10 );
return out;
}
@@ -1825,9 +1833,9 @@ access_30_11 (const UINT4 *in) {
UINT4 out;
in += 10 * WORD_INCR;
- out = ( (*in) >> 10 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 10 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 8 ))<<( 30 - 8 );
+ out |= (CONVERT(*in) % (1U<< 8 ))<<( 30 - 8 );
return out;
}
@@ -1836,9 +1844,9 @@ access_30_12 (const UINT4 *in) {
UINT4 out;
in += 11 * WORD_INCR;
- out = ( (*in) >> 8 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 8 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 6 ))<<( 30 - 6 );
+ out |= (CONVERT(*in) % (1U<< 6 ))<<( 30 - 6 );
return out;
}
@@ -1847,9 +1855,9 @@ access_30_13 (const UINT4 *in) {
UINT4 out;
in += 12 * WORD_INCR;
- out = ( (*in) >> 6 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 6 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 4 ))<<( 30 - 4 );
+ out |= (CONVERT(*in) % (1U<< 4 ))<<( 30 - 4 );
return out;
}
@@ -1858,112 +1866,112 @@ access_30_14 (const UINT4 *in) {
UINT4 out;
in += 13 * WORD_INCR;
- out = ( (*in) >> 4 ) % (1U << 30 ) ;
+ out = ( CONVERT(*in) >> 4 ) % (1U << 30 ) ;
in += 1 * WORD_INCR;
- out |= ((*in) % (1U<< 2 ))<<( 30 - 2 );
+ out |= (CONVERT(*in) % (1U<< 2 ))<<( 30 - 2 );
return out;
}
static UINT4
access_30_15 (const UINT4 *in) {
in += 14 * WORD_INCR;
- return ( (*in) >> 2 ) % (1U << 30 ) ;
+ return ( CONVERT(*in) >> 2 ) % (1U << 30 ) ;
}
static UINT4
access_32_00 (const UINT4 *in) {
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_01 (const UINT4 *in) {
in += 1 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_02 (const UINT4 *in) {
in += 2 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_03 (const UINT4 *in) {
in += 3 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_04 (const UINT4 *in) {
in += 4 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_05 (const UINT4 *in) {
in += 5 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_06 (const UINT4 *in) {
in += 6 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_07 (const UINT4 *in) {
in += 7 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_08 (const UINT4 *in) {
in += 8 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_09 (const UINT4 *in) {
in += 9 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_10 (const UINT4 *in) {
in += 10 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_11 (const UINT4 *in) {
in += 11 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_12 (const UINT4 *in) {
in += 12 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_13 (const UINT4 *in) {
in += 13 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_14 (const UINT4 *in) {
in += 14 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
static UINT4
access_32_15 (const UINT4 *in) {
in += 15 * WORD_INCR;
- return *in;
+ return CONVERT(*in);
}
@@ -2074,9 +2082,16 @@ Bitpack64_access (UINT4 position, UINT4 *ptrs, UINT4 *comp) {
#endif
info = &(ptrs[position/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
+
+#ifdef WORDS_BIGENDIAN
+ start = Bigendian_convert_uint(info[0]);
+ bitpack = (UINT4 *) &(comp[start*4]);
+ nwritten = Bigendian_convert_uint(info[1]) - start; /* In 128-bit registers */
+#else
start = info[0];
bitpack = (UINT4 *) &(comp[start*4]);
nwritten = info[1] - start; /* In 128-bit registers */
+#endif
remainder = position % BLOCKSIZE;
index = nwritten*16 + remainder % 16;
@@ -2109,9 +2124,16 @@ Bitpack64_access (UINT4 position, UINT4 *ptrs, UINT4 *comp) {
#endif
info = &(ptrs[position/BLOCKSIZE * DIRECT_METAINFO_SIZE]);
+
+#ifdef WORDS_BIGENDIAN
+ start = Bigendian_convert_uint(info[0]);
+ bitpack = (UINT4 *) &(comp[start*4]);
+ nwritten = Bigendian_convert_uint(info[1]) - start; /* In 128-bit registers */
+#else
start = info[0];
bitpack = (UINT4 *) &(comp[start*4]);
nwritten = info[1] - start; /* In 128-bit registers */
+#endif
remainder = position % BLOCKSIZE;
index = nwritten*16 + remainder/4;
diff --git a/src/bitpack64-read.c b/src/bitpack64-read.c
index 306697e..d611400 100644
--- a/src/bitpack64-read.c
+++ b/src/bitpack64-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-read.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: bitpack64-read.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8,7 +8,9 @@ static char rcsid[] = "$Id: bitpack64-read.c 153955 2014-11-24 17:54:45Z twu $";
#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+#include "bigendian.h"
+#elif defined(HAVE_SSE2)
#include <emmintrin.h>
#endif
@@ -126,7 +128,19 @@ Bitpack64_read_setup () {
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+static void
+unpack_00 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ int i;
+
+ for (i = 0; i < BLOCKSIZE; i++) {
+ *out++ = 0;
+ }
+
+ return;
+}
+
+#else
static void
unpack_00 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i total = _mm_set1_epi32(0U);
@@ -184,21 +198,10 @@ unpack_00_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_00 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- int i;
-
- for (i = 0; i < BLOCKSIZE; i++) {
- *out++ = 0;
- }
-
- return;
-}
#endif
+
#ifdef ALLOW_ODD_PACKSIZES
static void
unpack_01 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
@@ -274,7 +277,100 @@ unpack_01 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_02 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 2 ) ;
+ out++;
+ }
+
+ return;
+}
+
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_02 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 2 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
static void
unpack_02_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -743,52 +839,6 @@ unpack_02_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-
-static void
-unpack_02 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 2 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 4 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 6 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 8 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 10 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 2 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -875,7 +925,50 @@ unpack_03 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_04 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 2 ; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 4) {
+ *(out++) = ( Bigendian_convert_uint(*in) >> inwordpointer ) % (1U << 4 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_04 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 2 ; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 4) {
+ *(out++) = ( (*in) >> inwordpointer ) % (1U << 4 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#else
static void
unpack_04_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -1343,26 +1436,6 @@ unpack_04_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_04 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- UINT4 outer, inwordpointer;
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- for (outer = 0; outer < 2 ; outer++) {
- for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 4) {
- *(out++) = ( (*in) >> inwordpointer ) % (1U << 4 ) ;
- }
- in += 4;
- }
- }
-
- return;
-}
#endif
@@ -1455,29 +1528,129 @@ unpack_05 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_06_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask6 = _mm_set1_epi32(63U);
-
- OutReg = _mm_and_si128( InReg , mask6);
- _mm_store_si128(out++, OutReg);
+unpack_06 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,6) , mask6);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,12) , mask6);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 6 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 6 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 6 ) ;
+ out++;
+ }
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,18) , mask6);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ return;
+}
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,24) , mask6);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_06 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 6 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 6 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 6 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_06_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask6 = _mm_set1_epi32(63U);
+
+ OutReg = _mm_and_si128( InReg , mask6);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,6) , mask6);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,12) , mask6);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,18) , mask6);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,24) , mask6);
/* total = _mm_add_epi32(total, OutReg); */
_mm_store_si128(out++, OutReg);
@@ -1962,55 +2135,6 @@ unpack_06_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_06 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 6 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 6 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 6 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 10 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 6 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 6 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 8 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 6 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -2111,7 +2235,49 @@ unpack_07 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_08 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 4; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 8) {
+ *(out++) = ( Bigendian_convert_uint(*in) >> inwordpointer ) % (1U << 8 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_08 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 4; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 8) {
+ *(out++) = ( (*in) >> inwordpointer ) % (1U << 8 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#else
static void
unpack_08_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -2599,26 +2765,6 @@ unpack_08_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
}
-#else
-static void
-unpack_08 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- UINT4 outer, inwordpointer;
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- for (outer = 0; outer < 4; outer++) {
- for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 8) {
- *(out++) = ( (*in) >> inwordpointer ) % (1U << 8 ) ;
- }
- in += 4;
- }
- }
-
- return;
-}
#endif
@@ -2727,53 +2873,161 @@ unpack_09 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_10_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask10 = _mm_set1_epi32(1023U);
-
- OutReg = _mm_and_si128( InReg , mask10);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,10) , mask10);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,20) , mask10);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+unpack_10 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,30) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 10-8));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask10));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 10 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 10 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 10 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 10 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 10 ) ;
+ out++;
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,8) , mask10);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ }
+ return;
+}
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,18) , mask10);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_10 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,28) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask6), 10-6));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask10));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( (*in) >> 0 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 10 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 10 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 10 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 10 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 10 ) ;
+ out++;
+
+ }
+ return;
+}
+
+#else
+static void
+unpack_10_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask10 = _mm_set1_epi32(1023U);
+
+ OutReg = _mm_and_si128( InReg , mask10);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,10) , mask10);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,20) , mask10);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,30) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 10-8));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask10));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,8) , mask10);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,18) , mask10);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,28) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask6), 10-6));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 6), mask10));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
OutReg = _mm_and_si128( _mm_srli_epi32(InReg,6) , mask10);
/* total = _mm_add_epi32(total, OutReg); */
@@ -3274,59 +3528,6 @@ unpack_10_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_10 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 10 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 10 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 10 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 10 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 10 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 10 ) ;
- out++;
-
- }
- return;
-}
#endif
@@ -3440,7 +3641,117 @@ unpack_11 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_12 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 12 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 12 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_12 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 12 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 12 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
static void
unpack_12_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -3989,72 +4300,18 @@ unpack_12_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
+#endif
+
+
+#ifdef ALLOW_ODD_PACKSIZES
static void
-unpack_12 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
+unpack_13 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask13 = _mm_set1_epi32(8191U);
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 12 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 12 ) ;
- out++;
- }
-
- return;
-}
-#endif
-
-
-#ifdef ALLOW_ODD_PACKSIZES
-static void
-unpack_13 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask13 = _mm_set1_epi32(8191U);
-
- OutReg = _mm_and_si128( InReg , mask13);
- _mm_store_si128(out++, OutReg);
+ OutReg = _mm_and_si128( InReg , mask13);
+ _mm_store_si128(out++, OutReg);
OutReg = _mm_and_si128( _mm_srli_epi32(InReg,13) , mask13);
/* total = _mm_add_epi32(total, OutReg); */
@@ -4163,7 +4420,123 @@ unpack_13 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_14 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 14 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 14 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 14 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 14 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 14 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 14 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 14 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_14 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 14 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 14 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 14 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 14 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 14 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 14 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 14 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
static void
unpack_14_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -4747,63 +5120,6 @@ unpack_14_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_14 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 14 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 14 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 14 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 14 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 14 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 14 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 14 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -4932,7 +5248,49 @@ unpack_15 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_16 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 8; outer++) {
+ for(inwordpointer = 0; inwordpointer <32; inwordpointer += 16) {
+ *(out++) = ( Bigendian_convert_uint(*in) >> inwordpointer ) % (1U << 16 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_16 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 8; outer++) {
+ for(inwordpointer = 0; inwordpointer <32; inwordpointer += 16) {
+ *(out++) = ( (*in) >> inwordpointer ) % (1U << 16 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#else
static void
unpack_16_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -5437,27 +5795,7 @@ unpack_16_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_16 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- UINT4 outer, inwordpointer;
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- for (outer = 0; outer < 8; outer++) {
- for(inwordpointer = 0; inwordpointer <32; inwordpointer += 16) {
- *(out++) = ( (*in) >> inwordpointer ) % (1U << 16 ) ;
- }
- in += 4;
- }
- }
-
- return;
-}
-#endif
+#endif
#ifdef ALLOW_ODD_PACKSIZES
@@ -5591,7 +5929,131 @@ unpack_17 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_18 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 18 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 18 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 18 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 18 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 18 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 18 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 18 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 18 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 18 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_18 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 18 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 18 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 18 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 18 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 18 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 18 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 18 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 18 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 18 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
static void
unpack_18_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -6212,67 +6674,6 @@ unpack_18_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_18 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 18 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 18 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 18 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 18 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 18 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 18 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 18 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 18 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 18 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -6414,58 +6815,184 @@ unpack_19 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_20_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask20 = _mm_set1_epi32(1048575U);
-
- OutReg = _mm_and_si128( InReg , mask20);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,20) ;
- InReg = _mm_load_si128(++in);
-
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 20-8));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask20));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,8) , mask20);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+unpack_20 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,28) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask16), 20-16));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask20));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 20 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 20 ) ;
+ out++;
+ }
- OutReg = _mm_srli_epi32(InReg,16) ;
- InReg = _mm_load_si128(++in);
+ return;
+}
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask4), 20-4));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask20));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_20 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,4) , mask20);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_srli_epi32(InReg,24) ;
+ *out = ( (*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 20 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 20 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_20_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask20 = _mm_set1_epi32(1048575U);
+
+ OutReg = _mm_and_si128( InReg , mask20);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,20) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 20-8));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask20));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,8) , mask20);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,28) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask16), 20-16));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask20));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,16) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask4), 20-4));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask20));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,4) , mask20);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
InReg = _mm_load_si128(++in);
#ifdef MULTIMASK
@@ -7037,68 +7564,6 @@ unpack_20_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
}
-#else
-static void
-unpack_20 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 20 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 20 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -7247,61 +7712,193 @@ unpack_21 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_22_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask22 = _mm_set1_epi32(4194303U);
-
- OutReg = _mm_and_si128( InReg , mask22);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,22) ;
- InReg = _mm_load_si128(++in);
-
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask12), 22-12));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask22));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,12) ;
- InReg = _mm_load_si128(++in);
+unpack_22 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask2), 22-2));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask22));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,2) , mask22);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 22 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 22 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 22 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 22 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 22 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 22 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 18 ))<<( 22 - 18 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 22 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 22 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 22 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 22 ) ;
+ out++;
+ }
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
+ return;
+}
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask14), 22-14));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask22));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_22 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,14) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask4), 22-4));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask22));
-#endif
+ *out = ( (*in) >> 0 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 22 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 22 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 22 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 22 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 22 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 22 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 18 ))<<( 22 - 18 );
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 22 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 22 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 22 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 22 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_22_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask22 = _mm_set1_epi32(4194303U);
+
+ OutReg = _mm_and_si128( InReg , mask22);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,22) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask12), 22-12));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask22));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,12) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask2), 22-2));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask22));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,2) , mask22);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask14), 22-14));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask22));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,14) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask4), 22-4));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask22));
+#endif
/* total = _mm_add_epi32(total, OutReg); */
_mm_store_si128(out++, OutReg);
@@ -7903,71 +8500,6 @@ unpack_22_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_22 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 22 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 22 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 22 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 22 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 22 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 22 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 18 ))<<( 22 - 18 );
- out++;
- *out = ( (*in) >> 18 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 22 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 22 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 22 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 22 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -8123,50 +8655,180 @@ unpack_23 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_24_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask24 = _mm_set1_epi32(16777215U);
-
- OutReg = _mm_and_si128( InReg , mask24);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
-
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask16), 24-16));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask24));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,16) ;
- InReg = _mm_load_si128(++in);
+unpack_24 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 24-8));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask24));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_srli_epi32(InReg,8) ;
- InReg = _mm_load_si128(++in);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ out++;
+ }
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ return;
+}
- OutReg = _mm_and_si128( InReg , mask24);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_24 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_24_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask24 = _mm_set1_epi32(16777215U);
+
+ OutReg = _mm_and_si128( InReg , mask24);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask16), 24-16));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask24));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,16) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 24-8));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask24));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,8) ;
+ InReg = _mm_load_si128(++in);
+
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( InReg , mask24);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
#ifdef MULTIMASK
OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask16), 24-16));
@@ -8753,70 +9415,6 @@ unpack_24_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
}
-#else
-static void
-unpack_24 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -8979,59 +9577,199 @@ unpack_25 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_26_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask26 = _mm_set1_epi32(67108863U);
-
- OutReg = _mm_and_si128( InReg , mask26);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,26) ;
- InReg = _mm_load_si128(++in);
-
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask20), 26-20));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask26));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,20) ;
- InReg = _mm_load_si128(++in);
-
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask14), 26-14));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask26));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+unpack_26 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,14) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 26-8));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask26));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 26 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 26 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 26 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 26 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 26 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 26 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 22 ))<<( 26 - 22 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 26 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 26 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 26 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 26 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 26 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 18 ))<<( 26 - 18 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 26 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 26 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 26 ) ;
+ out++;
+ }
+
+ return;
+}
- OutReg = _mm_srli_epi32(InReg,8) ;
- InReg = _mm_load_si128(++in);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_26 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask2), 26-2));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask26));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 26 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 26 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 26 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 26 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 26 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 26 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 22 ))<<( 26 - 22 );
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 26 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 26 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 26 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 26 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 26 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 18 ))<<( 26 - 18 );
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 26 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 26 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 26 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_26_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask26 = _mm_set1_epi32(67108863U);
+
+ OutReg = _mm_and_si128( InReg , mask26);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,26) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask20), 26-20));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask26));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,20) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask14), 26-14));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask26));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,14) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask8), 26-8));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask26));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,8) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask2), 26-2));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 2), mask26));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
OutReg = _mm_and_si128( _mm_srli_epi32(InReg,2) , mask26);
/* total = _mm_add_epi32(total, OutReg); */
@@ -9670,75 +10408,6 @@ unpack_26_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_26 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 26 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 26 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 26 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 26 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 26 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 26 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 22 ))<<( 26 - 22 );
- out++;
- *out = ( (*in) >> 22 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 26 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 26 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 26 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 26 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 26 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 18 ))<<( 26 - 18 );
- out++;
- *out = ( (*in) >> 18 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 26 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 26 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 26 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -9908,42 +10577,184 @@ unpack_27 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_28_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask28 = _mm_set1_epi32(268435455U);
-
- OutReg = _mm_and_si128( InReg , mask28);
- _mm_store_si128(out++, OutReg);
+unpack_28 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,28) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask24), 28-24));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask28));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 28 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 28 ) ;
+ out++;
+ }
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
+ return;
+}
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask20), 28-20));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask28));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_28 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,20) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
+ *out = ( (*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 28 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 28 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_28_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask28 = _mm_set1_epi32(268435455U);
+
+ OutReg = _mm_and_si128( InReg , mask28);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,28) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask24), 28-24));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask28));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask20), 28-20));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask28));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,20) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask16), 28-16));
#else
OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask28));
@@ -10599,76 +11410,6 @@ unpack_28_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-static void
-unpack_28 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 28 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 28 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 28 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 28 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -10845,57 +11586,205 @@ unpack_29 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_30_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask30 = _mm_set1_epi32(1073741823U);
-
- OutReg = _mm_and_si128( InReg , mask30);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,30) ;
- InReg = _mm_load_si128(++in);
-
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask28), 30-28));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask30));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+unpack_30 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,28) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask26), 30-26));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask30));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 30 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 28 ))<<( 30 - 28 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 26 ))<<( 30 - 26 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 30 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 22 ))<<( 30 - 22 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 30 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 18 ))<<( 30 - 18 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 30 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 30 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 30 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 30 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 30 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 30 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 30 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 30 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 30 ) ;
+ out++;
+ }
- OutReg = _mm_srli_epi32(InReg,26) ;
- InReg = _mm_load_si128(++in);
+ return;
+}
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask24), 30-24));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask30));
-#endif
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_30 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
-#ifdef MULTIMASK
- OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask22), 30-22));
-#else
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask30));
-#endif
+ *out = ( (*in) >> 0 ) % (1U << 30 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 28 ))<<( 30 - 28 );
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 26 ))<<( 30 - 26 );
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 30 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 22 ))<<( 30 - 22 );
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 30 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 18 ))<<( 30 - 18 );
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 30 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 30 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 30 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 30 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 30 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 30 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 30 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 30 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 30 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_30_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask30 = _mm_set1_epi32(1073741823U);
+
+ OutReg = _mm_and_si128( InReg , mask30);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,30) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask28), 30-28));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask30));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,28) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask26), 30-26));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask30));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,26) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask24), 30-24));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 24), mask30));
+#endif
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+#ifdef MULTIMASK
+ OutReg = _mm_or_si128(OutReg, _mm_slli_epi32(_mm_and_si128(InReg, mask22), 30-22));
+#else
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 22), mask30));
+#endif
/* total = _mm_add_epi32(total, OutReg); */
_mm_store_si128(out++, OutReg);
@@ -11572,79 +12461,6 @@ unpack_30_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
}
-#else
-static void
-unpack_30 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 30 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 28 ))<<( 30 - 28 );
- out++;
- *out = ( (*in) >> 28 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 26 ))<<( 30 - 26 );
- out++;
- *out = ( (*in) >> 26 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 30 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 22 ))<<( 30 - 22 );
- out++;
- *out = ( (*in) >> 22 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 30 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 18 ))<<( 30 - 18 );
- out++;
- *out = ( (*in) >> 18 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 30 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 30 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 30 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 30 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 30 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 30 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 30 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 30 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 30 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -11829,35 +12645,157 @@ unpack_31 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_32_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i OutReg;
-
- OutReg = _mm_load_si128(in++);
- _mm_store_si128(out++, OutReg);
+unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ out++;
+ }
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+ return;
+}
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_load_si128(in++);
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_32_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i OutReg;
+
+ OutReg = _mm_load_si128(in++);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
/* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
_mm_store_si128(out++, OutReg);
@@ -12234,66 +13172,6 @@ unpack_32_rev_8 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
-#else
-static void
-unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -12392,9 +13270,10 @@ unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+
static void
-vertical_order_fwd (UINT4 *vertical, UINT4 *columnar) {
+vertical_order (UINT4 *vertical, UINT4 *columnar) {
vertical[0] = columnar[0]; /* remainder 1 */
vertical[4] = columnar[1]; /* remainder 5 */
@@ -12432,54 +13311,47 @@ vertical_order_fwd (UINT4 *vertical, UINT4 *columnar) {
vertical[27] = columnar[30]; /* remainder 28 */
vertical[31] = columnar[31]; /* remainder 32 */
- return;
-}
-
-static void
-vertical_order_rev (UINT4 *vertical, UINT4 *columnar) {
-
- vertical[0] = columnar[0]; /* remainder 63 */
- vertical[4] = columnar[1]; /* remainder 59 */
- vertical[8] = columnar[2]; /* remainder 55 */
- vertical[12] = columnar[3]; /* remainder 51 */
- vertical[16] = columnar[4]; /* remainder 47 */
- vertical[20] = columnar[5]; /* remainder 43 */
- vertical[24] = columnar[6]; /* remainder 39 */
- vertical[28] = columnar[7]; /* remainder 35 */
+ vertical[32] = columnar[32]; /* remainder 63 */
+ vertical[36] = columnar[33]; /* remainder 59 */
+ vertical[40] = columnar[34]; /* remainder 55 */
+ vertical[44] = columnar[35]; /* remainder 51 */
+ vertical[48] = columnar[36]; /* remainder 47 */
+ vertical[52] = columnar[37]; /* remainder 43 */
+ vertical[56] = columnar[38]; /* remainder 39 */
+ vertical[60] = columnar[39]; /* remainder 35 */
- vertical[1] = columnar[8]; /* remainder 62 */
- vertical[5] = columnar[9]; /* remainder 58 */
- vertical[9] = columnar[10]; /* remainder 54 */
- vertical[13] = columnar[11]; /* remainder 50 */
- vertical[17] = columnar[12]; /* remainder 46 */
- vertical[21] = columnar[13]; /* remainder 42 */
- vertical[25] = columnar[14]; /* remainder 38 */
- vertical[29] = columnar[15]; /* remainder 34 */
+ vertical[33] = columnar[40]; /* remainder 62 */
+ vertical[37] = columnar[41]; /* remainder 58 */
+ vertical[41] = columnar[42]; /* remainder 54 */
+ vertical[45] = columnar[43]; /* remainder 50 */
+ vertical[49] = columnar[44]; /* remainder 46 */
+ vertical[53] = columnar[45]; /* remainder 42 */
+ vertical[57] = columnar[46]; /* remainder 38 */
+ vertical[61] = columnar[47]; /* remainder 34 */
- vertical[2] = columnar[16]; /* remainder 61 */
- vertical[6] = columnar[17]; /* remainder 57 */
- vertical[10] = columnar[18]; /* remainder 53 */
- vertical[14] = columnar[19]; /* remainder 49 */
- vertical[18] = columnar[20]; /* remainder 45 */
- vertical[22] = columnar[21]; /* remainder 41 */
- vertical[26] = columnar[22]; /* remainder 37 */
- vertical[30] = columnar[23]; /* remainder 33 */
+ vertical[34] = columnar[48]; /* remainder 61 */
+ vertical[38] = columnar[49]; /* remainder 57 */
+ vertical[42] = columnar[50]; /* remainder 53 */
+ vertical[46] = columnar[51]; /* remainder 49 */
+ vertical[50] = columnar[52]; /* remainder 45 */
+ vertical[54] = columnar[53]; /* remainder 41 */
+ vertical[58] = columnar[54]; /* remainder 37 */
+ vertical[62] = columnar[55]; /* remainder 33 */
- vertical[3] = columnar[24]; /* remainder 60 */
- vertical[7] = columnar[25]; /* remainder 56 */
- vertical[11] = columnar[26]; /* remainder 52 */
- vertical[15] = columnar[27]; /* remainder 48 */
- vertical[19] = columnar[28]; /* remainder 44 */
- vertical[23] = columnar[29]; /* remainder 40 */
- vertical[27] = columnar[30]; /* remainder 36 */
- vertical[31] = columnar[31]; /* remainder 32 */
+ vertical[35] = columnar[56]; /* remainder 60 */
+ vertical[39] = columnar[57]; /* remainder 56 */
+ vertical[43] = columnar[58]; /* remainder 52 */
+ vertical[47] = columnar[59]; /* remainder 48 */
+ vertical[51] = columnar[60]; /* remainder 44 */
+ vertical[55] = columnar[61]; /* remainder 40 */
+ vertical[59] = columnar[62]; /* remainder 36 */
+ vertical[63] = columnar[63]; /* remainder 32 */
return;
}
-#if defined(HAVE_64_BIT) && (defined(UTILITYP) || defined(LARGE_GENOMES))
static void
-vertical_order_huge_fwd (UINT8 *vertical, UINT4 *columnar) {
+vertical_order_huge (UINT8 *vertical, UINT4 *columnar) {
vertical[0] = (UINT8) columnar[0]; /* remainder 1 */
vertical[4] = (UINT8) columnar[1]; /* remainder 5 */
@@ -12517,58 +13389,49 @@ vertical_order_huge_fwd (UINT8 *vertical, UINT4 *columnar) {
vertical[27] = (UINT8) columnar[30]; /* remainder 28 */
vertical[31] = (UINT8) columnar[31]; /* remainder 32 */
- return;
-}
-#endif
+ vertical[32] = (UINT8) columnar[32]; /* remainder 63 */
+ vertical[36] = (UINT8) columnar[33]; /* remainder 59 */
+ vertical[40] = (UINT8) columnar[34]; /* remainder 55 */
+ vertical[44] = (UINT8) columnar[35]; /* remainder 51 */
+ vertical[48] = (UINT8) columnar[36]; /* remainder 47 */
+ vertical[52] = (UINT8) columnar[37]; /* remainder 43 */
+ vertical[56] = (UINT8) columnar[38]; /* remainder 39 */
+ vertical[60] = (UINT8) columnar[39]; /* remainder 35 */
-#if defined(HAVE_64_BIT) && (defined(UTILITYP) || defined(LARGE_GENOMES))
-static void
-vertical_order_huge_rev (UINT8 *vertical, UINT4 *columnar) {
+ vertical[33] = (UINT8) columnar[40]; /* remainder 62 */
+ vertical[37] = (UINT8) columnar[41]; /* remainder 58 */
+ vertical[41] = (UINT8) columnar[42]; /* remainder 54 */
+ vertical[45] = (UINT8) columnar[43]; /* remainder 50 */
+ vertical[49] = (UINT8) columnar[44]; /* remainder 46 */
+ vertical[53] = (UINT8) columnar[45]; /* remainder 42 */
+ vertical[57] = (UINT8) columnar[46]; /* remainder 38 */
+ vertical[61] = (UINT8) columnar[47]; /* remainder 34 */
- vertical[0] = (UINT8) columnar[0]; /* remainder 63 */
- vertical[4] = (UINT8) columnar[1]; /* remainder 59 */
- vertical[8] = (UINT8) columnar[2]; /* remainder 55 */
- vertical[12] = (UINT8) columnar[3]; /* remainder 51 */
- vertical[16] = (UINT8) columnar[4]; /* remainder 47 */
- vertical[20] = (UINT8) columnar[5]; /* remainder 43 */
- vertical[24] = (UINT8) columnar[6]; /* remainder 39 */
- vertical[28] = (UINT8) columnar[7]; /* remainder 35 */
+ vertical[34] = (UINT8) columnar[48]; /* remainder 61 */
+ vertical[38] = (UINT8) columnar[49]; /* remainder 57 */
+ vertical[42] = (UINT8) columnar[50]; /* remainder 53 */
+ vertical[46] = (UINT8) columnar[51]; /* remainder 49 */
+ vertical[50] = (UINT8) columnar[52]; /* remainder 45 */
+ vertical[54] = (UINT8) columnar[53]; /* remainder 41 */
+ vertical[58] = (UINT8) columnar[54]; /* remainder 37 */
+ vertical[62] = (UINT8) columnar[55]; /* remainder 33 */
- vertical[1] = (UINT8) columnar[8]; /* remainder 62 */
- vertical[5] = (UINT8) columnar[9]; /* remainder 58 */
- vertical[9] = (UINT8) columnar[10]; /* remainder 54 */
- vertical[13] = (UINT8) columnar[11]; /* remainder 50 */
- vertical[17] = (UINT8) columnar[12]; /* remainder 46 */
- vertical[21] = (UINT8) columnar[13]; /* remainder 42 */
- vertical[25] = (UINT8) columnar[14]; /* remainder 38 */
- vertical[29] = (UINT8) columnar[15]; /* remainder 34 */
-
- vertical[2] = (UINT8) columnar[16]; /* remainder 61 */
- vertical[6] = (UINT8) columnar[17]; /* remainder 57 */
- vertical[10] = (UINT8) columnar[18]; /* remainder 53 */
- vertical[14] = (UINT8) columnar[19]; /* remainder 49 */
- vertical[18] = (UINT8) columnar[20]; /* remainder 45 */
- vertical[22] = (UINT8) columnar[21]; /* remainder 41 */
- vertical[26] = (UINT8) columnar[22]; /* remainder 37 */
- vertical[30] = (UINT8) columnar[23]; /* remainder 33 */
-
- vertical[3] = (UINT8) columnar[24]; /* remainder 60 */
- vertical[7] = (UINT8) columnar[25]; /* remainder 56 */
- vertical[11] = (UINT8) columnar[26]; /* remainder 52 */
- vertical[15] = (UINT8) columnar[27]; /* remainder 48 */
- vertical[19] = (UINT8) columnar[28]; /* remainder 44 */
- vertical[23] = (UINT8) columnar[29]; /* remainder 40 */
- vertical[27] = (UINT8) columnar[30]; /* remainder 36 */
- vertical[31] = (UINT8) columnar[31]; /* remainder 32 */
+ vertical[35] = (UINT8) columnar[56]; /* remainder 60 */
+ vertical[39] = (UINT8) columnar[57]; /* remainder 56 */
+ vertical[43] = (UINT8) columnar[58]; /* remainder 52 */
+ vertical[47] = (UINT8) columnar[59]; /* remainder 48 */
+ vertical[51] = (UINT8) columnar[60]; /* remainder 44 */
+ vertical[55] = (UINT8) columnar[61]; /* remainder 40 */
+ vertical[59] = (UINT8) columnar[62]; /* remainder 36 */
+ vertical[63] = (UINT8) columnar[63]; /* remainder 32 */
return;
}
-#endif
-#else
+#else
static void
-vertical_order (UINT4 *vertical, UINT4 *columnar) {
+vertical_order_fwd (UINT4 *vertical, UINT4 *columnar) {
vertical[0] = columnar[0]; /* remainder 1 */
vertical[4] = columnar[1]; /* remainder 5 */
@@ -12606,47 +13469,54 @@ vertical_order (UINT4 *vertical, UINT4 *columnar) {
vertical[27] = columnar[30]; /* remainder 28 */
vertical[31] = columnar[31]; /* remainder 32 */
- vertical[32] = columnar[32]; /* remainder 63 */
- vertical[36] = columnar[33]; /* remainder 59 */
- vertical[40] = columnar[34]; /* remainder 55 */
- vertical[44] = columnar[35]; /* remainder 51 */
- vertical[48] = columnar[36]; /* remainder 47 */
- vertical[52] = columnar[37]; /* remainder 43 */
- vertical[56] = columnar[38]; /* remainder 39 */
- vertical[60] = columnar[39]; /* remainder 35 */
+ return;
+}
- vertical[33] = columnar[40]; /* remainder 62 */
- vertical[37] = columnar[41]; /* remainder 58 */
- vertical[41] = columnar[42]; /* remainder 54 */
- vertical[45] = columnar[43]; /* remainder 50 */
- vertical[49] = columnar[44]; /* remainder 46 */
- vertical[53] = columnar[45]; /* remainder 42 */
- vertical[57] = columnar[46]; /* remainder 38 */
- vertical[61] = columnar[47]; /* remainder 34 */
+static void
+vertical_order_rev (UINT4 *vertical, UINT4 *columnar) {
- vertical[34] = columnar[48]; /* remainder 61 */
- vertical[38] = columnar[49]; /* remainder 57 */
- vertical[42] = columnar[50]; /* remainder 53 */
- vertical[46] = columnar[51]; /* remainder 49 */
- vertical[50] = columnar[52]; /* remainder 45 */
- vertical[54] = columnar[53]; /* remainder 41 */
- vertical[58] = columnar[54]; /* remainder 37 */
- vertical[62] = columnar[55]; /* remainder 33 */
+ vertical[0] = columnar[0]; /* remainder 63 */
+ vertical[4] = columnar[1]; /* remainder 59 */
+ vertical[8] = columnar[2]; /* remainder 55 */
+ vertical[12] = columnar[3]; /* remainder 51 */
+ vertical[16] = columnar[4]; /* remainder 47 */
+ vertical[20] = columnar[5]; /* remainder 43 */
+ vertical[24] = columnar[6]; /* remainder 39 */
+ vertical[28] = columnar[7]; /* remainder 35 */
- vertical[35] = columnar[56]; /* remainder 60 */
- vertical[39] = columnar[57]; /* remainder 56 */
- vertical[43] = columnar[58]; /* remainder 52 */
- vertical[47] = columnar[59]; /* remainder 48 */
- vertical[51] = columnar[60]; /* remainder 44 */
- vertical[55] = columnar[61]; /* remainder 40 */
- vertical[59] = columnar[62]; /* remainder 36 */
- vertical[63] = columnar[63]; /* remainder 32 */
+ vertical[1] = columnar[8]; /* remainder 62 */
+ vertical[5] = columnar[9]; /* remainder 58 */
+ vertical[9] = columnar[10]; /* remainder 54 */
+ vertical[13] = columnar[11]; /* remainder 50 */
+ vertical[17] = columnar[12]; /* remainder 46 */
+ vertical[21] = columnar[13]; /* remainder 42 */
+ vertical[25] = columnar[14]; /* remainder 38 */
+ vertical[29] = columnar[15]; /* remainder 34 */
+
+ vertical[2] = columnar[16]; /* remainder 61 */
+ vertical[6] = columnar[17]; /* remainder 57 */
+ vertical[10] = columnar[18]; /* remainder 53 */
+ vertical[14] = columnar[19]; /* remainder 49 */
+ vertical[18] = columnar[20]; /* remainder 45 */
+ vertical[22] = columnar[21]; /* remainder 41 */
+ vertical[26] = columnar[22]; /* remainder 37 */
+ vertical[30] = columnar[23]; /* remainder 33 */
+
+ vertical[3] = columnar[24]; /* remainder 60 */
+ vertical[7] = columnar[25]; /* remainder 56 */
+ vertical[11] = columnar[26]; /* remainder 52 */
+ vertical[15] = columnar[27]; /* remainder 48 */
+ vertical[19] = columnar[28]; /* remainder 44 */
+ vertical[23] = columnar[29]; /* remainder 40 */
+ vertical[27] = columnar[30]; /* remainder 36 */
+ vertical[31] = columnar[31]; /* remainder 32 */
return;
}
+#if defined(HAVE_64_BIT) && (defined(UTILITYP) || defined(LARGE_GENOMES))
static void
-vertical_order_huge (UINT8 *vertical, UINT4 *columnar) {
+vertical_order_huge_fwd (UINT8 *vertical, UINT4 *columnar) {
vertical[0] = (UINT8) columnar[0]; /* remainder 1 */
vertical[4] = (UINT8) columnar[1]; /* remainder 5 */
@@ -12684,53 +13554,61 @@ vertical_order_huge (UINT8 *vertical, UINT4 *columnar) {
vertical[27] = (UINT8) columnar[30]; /* remainder 28 */
vertical[31] = (UINT8) columnar[31]; /* remainder 32 */
- vertical[32] = (UINT8) columnar[32]; /* remainder 63 */
- vertical[36] = (UINT8) columnar[33]; /* remainder 59 */
- vertical[40] = (UINT8) columnar[34]; /* remainder 55 */
- vertical[44] = (UINT8) columnar[35]; /* remainder 51 */
- vertical[48] = (UINT8) columnar[36]; /* remainder 47 */
- vertical[52] = (UINT8) columnar[37]; /* remainder 43 */
- vertical[56] = (UINT8) columnar[38]; /* remainder 39 */
- vertical[60] = (UINT8) columnar[39]; /* remainder 35 */
+ return;
+}
+#endif
- vertical[33] = (UINT8) columnar[40]; /* remainder 62 */
- vertical[37] = (UINT8) columnar[41]; /* remainder 58 */
- vertical[41] = (UINT8) columnar[42]; /* remainder 54 */
- vertical[45] = (UINT8) columnar[43]; /* remainder 50 */
- vertical[49] = (UINT8) columnar[44]; /* remainder 46 */
- vertical[53] = (UINT8) columnar[45]; /* remainder 42 */
- vertical[57] = (UINT8) columnar[46]; /* remainder 38 */
- vertical[61] = (UINT8) columnar[47]; /* remainder 34 */
+#if defined(HAVE_64_BIT) && (defined(UTILITYP) || defined(LARGE_GENOMES))
+static void
+vertical_order_huge_rev (UINT8 *vertical, UINT4 *columnar) {
- vertical[34] = (UINT8) columnar[48]; /* remainder 61 */
- vertical[38] = (UINT8) columnar[49]; /* remainder 57 */
- vertical[42] = (UINT8) columnar[50]; /* remainder 53 */
- vertical[46] = (UINT8) columnar[51]; /* remainder 49 */
- vertical[50] = (UINT8) columnar[52]; /* remainder 45 */
- vertical[54] = (UINT8) columnar[53]; /* remainder 41 */
- vertical[58] = (UINT8) columnar[54]; /* remainder 37 */
- vertical[62] = (UINT8) columnar[55]; /* remainder 33 */
+ vertical[0] = (UINT8) columnar[0]; /* remainder 63 */
+ vertical[4] = (UINT8) columnar[1]; /* remainder 59 */
+ vertical[8] = (UINT8) columnar[2]; /* remainder 55 */
+ vertical[12] = (UINT8) columnar[3]; /* remainder 51 */
+ vertical[16] = (UINT8) columnar[4]; /* remainder 47 */
+ vertical[20] = (UINT8) columnar[5]; /* remainder 43 */
+ vertical[24] = (UINT8) columnar[6]; /* remainder 39 */
+ vertical[28] = (UINT8) columnar[7]; /* remainder 35 */
- vertical[35] = (UINT8) columnar[56]; /* remainder 60 */
- vertical[39] = (UINT8) columnar[57]; /* remainder 56 */
- vertical[43] = (UINT8) columnar[58]; /* remainder 52 */
- vertical[47] = (UINT8) columnar[59]; /* remainder 48 */
- vertical[51] = (UINT8) columnar[60]; /* remainder 44 */
- vertical[55] = (UINT8) columnar[61]; /* remainder 40 */
- vertical[59] = (UINT8) columnar[62]; /* remainder 36 */
- vertical[63] = (UINT8) columnar[63]; /* remainder 32 */
+ vertical[1] = (UINT8) columnar[8]; /* remainder 62 */
+ vertical[5] = (UINT8) columnar[9]; /* remainder 58 */
+ vertical[9] = (UINT8) columnar[10]; /* remainder 54 */
+ vertical[13] = (UINT8) columnar[11]; /* remainder 50 */
+ vertical[17] = (UINT8) columnar[12]; /* remainder 46 */
+ vertical[21] = (UINT8) columnar[13]; /* remainder 42 */
+ vertical[25] = (UINT8) columnar[14]; /* remainder 38 */
+ vertical[29] = (UINT8) columnar[15]; /* remainder 34 */
+
+ vertical[2] = (UINT8) columnar[16]; /* remainder 61 */
+ vertical[6] = (UINT8) columnar[17]; /* remainder 57 */
+ vertical[10] = (UINT8) columnar[18]; /* remainder 53 */
+ vertical[14] = (UINT8) columnar[19]; /* remainder 49 */
+ vertical[18] = (UINT8) columnar[20]; /* remainder 45 */
+ vertical[22] = (UINT8) columnar[21]; /* remainder 41 */
+ vertical[26] = (UINT8) columnar[22]; /* remainder 37 */
+ vertical[30] = (UINT8) columnar[23]; /* remainder 33 */
+
+ vertical[3] = (UINT8) columnar[24]; /* remainder 60 */
+ vertical[7] = (UINT8) columnar[25]; /* remainder 56 */
+ vertical[11] = (UINT8) columnar[26]; /* remainder 52 */
+ vertical[15] = (UINT8) columnar[27]; /* remainder 48 */
+ vertical[19] = (UINT8) columnar[28]; /* remainder 44 */
+ vertical[23] = (UINT8) columnar[29]; /* remainder 40 */
+ vertical[27] = (UINT8) columnar[30]; /* remainder 36 */
+ vertical[31] = (UINT8) columnar[31]; /* remainder 32 */
return;
}
-
#endif
+#endif
-#ifdef HAVE_SSE2
-typedef void (*Unpacker_T) (__m128i* __restrict__, const __m128i* __restrict__);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
typedef void (*Unpacker_T) (UINT4* __restrict__, const UINT4* __restrict__);
+#else
+typedef void (*Unpacker_T) (__m128i* __restrict__, const __m128i* __restrict__);
#endif
@@ -12745,8 +13623,20 @@ static Unpacker_T unpacker_table[33] =
unpack_21, unpack_22, unpack_23, unpack_24,
unpack_25, unpack_26, unpack_27, unpack_28,
unpack_29, unpack_30, unpack_31, unpack_32};
+
+#elif defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+static Unpacker_T unpacker_all_table[33] =
+ {unpack_00,
+ unpack_00, unpack_02, unpack_00, unpack_04,
+ unpack_00, unpack_06, unpack_00, unpack_08,
+ unpack_00, unpack_10, unpack_00, unpack_12,
+ unpack_00, unpack_14, unpack_00, unpack_16,
+ unpack_00, unpack_18, unpack_00, unpack_20,
+ unpack_00, unpack_22, unpack_00, unpack_24,
+ unpack_00, unpack_26, unpack_00, unpack_28,
+ unpack_00, unpack_30, unpack_00, unpack_32};
+
#else
-#ifdef HAVE_SSE2
static Unpacker_T unpacker_all_table[34] =
{unpack_00, unpack_00,
unpack_02_fwd, unpack_02_rev, unpack_04_fwd, unpack_04_rev,
@@ -12864,18 +13754,6 @@ static Unpacker_T unpacker_table[17][17] =
};
-#else
-static Unpacker_T unpacker_all_table[33] =
- {unpack_00,
- unpack_00, unpack_02, unpack_00, unpack_04,
- unpack_00, unpack_06, unpack_00, unpack_08,
- unpack_00, unpack_10, unpack_00, unpack_12,
- unpack_00, unpack_14, unpack_00, unpack_16,
- unpack_00, unpack_18, unpack_00, unpack_20,
- unpack_00, unpack_22, unpack_00, unpack_24,
- unpack_00, unpack_26, unpack_00, unpack_28,
- unpack_00, unpack_30, unpack_00, unpack_32};
-#endif
#endif
@@ -13786,7 +14664,11 @@ Bitpack64_read_one (Storedoligomer_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackco
Storedoligomer_T bmer;
UINT4 *info, nwritten, packsize_div2;
int delta, remainder, quarter_block, column, row;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ UINT4 ptr;
+ UINT4 diffs[BLOCKSIZE+1], *bitpack;
+ int k, i;
+#else
#ifdef BRANCH_FREE_ROW_SUM
__m128i diffs[3];
#else
@@ -13797,10 +14679,6 @@ Bitpack64_read_one (Storedoligomer_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackco
#endif
__m128i *bitpack;
UINT4 *_diffs;
-#else
- UINT4 ptr;
- UINT4 diffs[BLOCKSIZE+1], *bitpack;
- int k, i;
#endif
#ifdef DEBUG
UINT4 offsets[BLOCKSIZE+1];
@@ -13812,72 +14690,37 @@ Bitpack64_read_one (Storedoligomer_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackco
debug(printf("Entered Bitpack64_read_one with oligo %u => bmer %u\n",oligo,bmer));
+#if defined(WORDS_BIGENDIAN)
+ nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten);
+
+#elif !defined(HAVE_SSE2)
nwritten = info[0]; /* In 128-bit registers */
-#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-#else
bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-#endif
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+#else
+ nwritten = info[0]; /* In 128-bit registers */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
/* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+#endif
remainder = oligo % BLOCKSIZE;
quarter_block = remainder / 16;
-#ifdef HAVE_SSE2
- _diffs = (UINT4 *) diffs; /* Assumes a dummy register in diffs[0] */
-
-#ifdef BRANCH_FREE_QTR_BLOCK
- psums[0] = psums[1] = info[1];
- psums[2] = psums[3] = info[DIFFERENTIAL_METAINFO_SIZE+1];
-
- delta = 31 - abs(remainder - 32);
- column = get_column(delta);
- row = get_row(delta);
- debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block,delta,column,row));
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
- (unpacker_table[packsize_div2][column*4 + quarter_block])(diffs,bitpack);
- return psums[quarter_block] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
+ /* Unpack all 64 diffs for non-SIMD */
+ (unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
+ if (remainder <= 16) {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset0*/info[1]);
#else
-
- if (quarter_block <= 1) {
- delta = remainder - 1;
- column = get_column(delta);
- row = get_row(delta);
- debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block,delta,column,row));
-
- (unpacker_table[packsize_div2][column*4 + quarter_block])(diffs,bitpack);
-#ifdef BRANCH_FREE_ROW_SUM
- return info[1] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
-#else
- return_sum_fwd(info[1],_diffs,row);
-#endif
-
- } else {
- delta = 63 - remainder;
- column = get_column(delta);
- row = get_row(delta);
- debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block,delta,column,row));
-
- (unpacker_table[packsize_div2][column*4 + quarter_block])(diffs,bitpack);
-#ifdef BRANCH_FREE_ROW_SUM
- return info[DIFFERENTIAL_METAINFO_SIZE+1] - _diffs[row+1] - _diffs[row+2] - _diffs[row+3] - _diffs[row+4];
-#else
- return_sum_rev(info[DIFFERENTIAL_METAINFO_SIZE+1],_diffs,row);
-#endif
- }
-
-#endif
-
-#else
-
- /* Unpack all 64 diffs for non-SIMD */
- (unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
-
- if (remainder <= 16) {
- ptr = /*offset0*/info[1];
+ ptr = /*offset0*/info[1];
+#endif
delta = remainder - 1;
column = get_column(delta);
@@ -13890,7 +14733,11 @@ Bitpack64_read_one (Storedoligomer_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackco
}
} else if (remainder <= 32) {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset0*/info[1]);
+#else
ptr = /*offset0*/info[1];
+#endif
delta = remainder - 1;
column = get_column(delta);
@@ -13908,7 +14755,11 @@ Bitpack64_read_one (Storedoligomer_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackco
}
} else if (remainder <= 48) {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
+#else
ptr = /*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1];
+#endif
delta = 63 - remainder;
column = get_column(delta);
@@ -13926,7 +14777,11 @@ Bitpack64_read_one (Storedoligomer_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackco
}
} else {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
+#else
ptr = /*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1];
+#endif
delta = 63 - remainder;
column = get_column(delta);
@@ -13941,7 +14796,52 @@ Bitpack64_read_one (Storedoligomer_T oligo, UINT4 *bitpackptrs, UINT4 *bitpackco
return ptr;
-#endif /* HAVE_SSE2 */
+#else /* littleendian and SSE2 */
+ _diffs = (UINT4 *) diffs; /* Assumes a dummy register in diffs[0] */
+
+#ifdef BRANCH_FREE_QTR_BLOCK
+ psums[0] = psums[1] = info[1];
+ psums[2] = psums[3] = info[DIFFERENTIAL_METAINFO_SIZE+1];
+
+ delta = 31 - abs(remainder - 32);
+ column = get_column(delta);
+ row = get_row(delta);
+ debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block,delta,column,row));
+
+ (unpacker_table[packsize_div2][column*4 + quarter_block])(diffs,bitpack);
+ return psums[quarter_block] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
+
+#else
+
+ if (quarter_block <= 1) {
+ delta = remainder - 1;
+ column = get_column(delta);
+ row = get_row(delta);
+ debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block,delta,column,row));
+
+ (unpacker_table[packsize_div2][column*4 + quarter_block])(diffs,bitpack);
+#ifdef BRANCH_FREE_ROW_SUM
+ return info[1] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
+#else
+ return_sum_fwd(info[1],_diffs,row);
+#endif
+
+ } else {
+ delta = 63 - remainder;
+ column = get_column(delta);
+ row = get_row(delta);
+ debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block,delta,column,row));
+
+ (unpacker_table[packsize_div2][column*4 + quarter_block])(diffs,bitpack);
+#ifdef BRANCH_FREE_ROW_SUM
+ return info[DIFFERENTIAL_METAINFO_SIZE+1] - _diffs[row+1] - _diffs[row+2] - _diffs[row+3] - _diffs[row+4];
+#else
+ return_sum_rev(info[DIFFERENTIAL_METAINFO_SIZE+1],_diffs,row);
+#endif
+ }
+
+#endif /* BRANCH_FREE_QTR_BLOCK */
+#endif /* littleendian and SSE2 */
}
@@ -13955,7 +14855,11 @@ Bitpack64_read_one_huge (Storedoligomer_T oligo, UINT4 *bitpackpages,
UINT4 *info, nwritten, packsize_div2;
UINT8 offset0, offset1;
int delta, remainder, quarter_block, column, row;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ UINT8 ptr;
+ UINT4 diffs[BLOCKSIZE+1], *bitpack;
+ int k;
+#else
#ifdef BRANCH_FREE_ROW_SUM
__m128i diffs[3];
#else
@@ -13966,10 +14870,6 @@ Bitpack64_read_one_huge (Storedoligomer_T oligo, UINT4 *bitpackpages,
#endif
__m128i *bitpack;
UINT4 *_diffs;
-#else
- UINT8 ptr;
- UINT4 diffs[BLOCKSIZE+1], *bitpack;
- int k;
#endif
int i;
@@ -13979,15 +14879,23 @@ Bitpack64_read_one_huge (Storedoligomer_T oligo, UINT4 *bitpackpages,
debug(printf("Entered Bitpack64_read_one_huge with oligo %u => bmer %u\n",oligo,bmer));
+#ifdef WORDS_BIGENDIAN
+ nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ packsize_div2 = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten);
+
+#elif !defined(HAVE_SSE2)
nwritten = info[0]; /* In 128-bit registers */
-#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-#else
bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-#endif
+ packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+#else
+ nwritten = info[0]; /* In 128-bit registers */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
/* packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2; */
packsize_div2 = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten);
+#endif
+
#ifdef DEBUG
printf("bitpack (for packsize %d):\n",packsize_div2*2);
@@ -14000,7 +14908,152 @@ Bitpack64_read_one_huge (Storedoligomer_T oligo, UINT4 *bitpackpages,
remainder = oligo % BLOCKSIZE;
quarter_block = remainder / 16;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+
+ /* Unpack all 64 diffs for non-SIMD */
+ (unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
+
+ if ((remainder = oligo % BLOCKSIZE) == 0) {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset0*/info[1]);
+#else
+ ptr = /*offset0*/info[1];
+#endif
+
+ } else if (remainder <= 16) {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset0*/info[1]);
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= Bigendian_convert_uint(*pageptr)) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#else
+ ptr = /*offset0*/info[1];
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= *pageptr) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#endif
+
+ column = (remainder - 1) % 4; /* Goes from 0 to 3 */
+ row = (remainder - 1) / 4;
+ debug(printf("column %d, row %d\n",column,row));
+
+ for (k = column*2 + 1, i = 0; i <= row; k += BLOCKSIZE/4, i++) {
+ debug(printf("Adding diffs[%d] = %u\n",k,diffs[k]));
+ ptr += diffs[k];
+ }
+
+ } else if (remainder <= 32) {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset0*/info[1]);
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= Bigendian_convert_uint(*pageptr)) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#else
+ ptr = /*offset0*/info[1];
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= *pageptr) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#endif
+
+ column = (remainder - 1) % 4; /* Goes from 0 to 3 */
+ row = (remainder - 1) / 4;
+ debug(printf("column %d, row %d\n",column,row));
+
+ for (k = column*2 + 1, i = 0; i < 4; k += BLOCKSIZE/4, i++) {
+ debug(printf("Adding diffs[%d] = %u\n",k,diffs[k]));
+ ptr += diffs[k];
+ }
+
+ for (k = column*2 + 2; i <= row; k += BLOCKSIZE/4, i++) {
+ debug(printf("Adding diffs[%d] = %u\n",k,diffs[k]));
+ ptr += diffs[k];
+ }
+
+ } else if (remainder <= 48) {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= Bigendian_convert_uint(*pageptr)) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#else
+ ptr = /*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1];
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= *pageptr) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#endif
+
+ column = (63 - remainder) % 4; /* Goes from 0 to 3. Assert remainder < 64 */
+ row = (63 - remainder) / 4;
+ debug(printf("column %d, row %d\n",column,row));
+
+ for (k = column*2 + 9, i = 0; i < 4; k += BLOCKSIZE/4, i++) {
+ debug(printf("Subtracting diffs[%d] = %u\n",k,diffs[k]));
+ ptr -= diffs[k];
+ }
+
+ for (k = column*2 + 10; i <= row; k += BLOCKSIZE/4, i++) {
+ debug(printf("Subtracting diffs[%d] = %u\n",k,diffs[k]));
+ ptr -= diffs[k];
+ }
+
+ } else {
+#ifdef WORDS_BIGENDIAN
+ ptr = Bigendian_convert_uint(/*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1]);
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= Bigendian_convert_uint(*pageptr)) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#else
+ ptr = /*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1];
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ while (bmer+1 >= *pageptr) {
+ ptr += POSITIONS_PAGE;
+ pageptr++;
+ }
+ }
+#endif
+
+ column = (63 - remainder) % 4; /* Goes from 0 to 3. Assert remainder < 64 */
+ row = (63 - remainder) / 4;
+ debug(printf("column %d, row %d\n",column,row));
+
+ for (k = column*2 + 9, i = 0; i <= row; k += BLOCKSIZE/4, i++) {
+ debug(printf("Subtracting diffs[%d] = %u\n",k,diffs[k]));
+ ptr -= diffs[k];
+ }
+ }
+
+ return ptr;
+
+#else /* littleendian and SSE2 */
_diffs = (UINT4 *) diffs; /* Assumes a dummy register in diffs[0] */
#ifdef BRANCH_FREE_QTR_BLOCK
@@ -14107,106 +15160,8 @@ Bitpack64_read_one_huge (Storedoligomer_T oligo, UINT4 *bitpackpages,
#endif
}
-#endif
-
-#else
-
- /* Unpack all 64 diffs for non-SIMD */
- (unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
-
- if ((remainder = oligo % BLOCKSIZE) == 0) {
- ptr = /*offset0*/info[1];
-
- } else if (remainder <= 16) {
- ptr = /*offset0*/info[1];
- if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer+1 >= *pageptr) {
- ptr += POSITIONS_PAGE;
- pageptr++;
- }
- }
-
- column = (remainder - 1) % 4; /* Goes from 0 to 3 */
- row = (remainder - 1) / 4;
- debug(printf("column %d, row %d\n",column,row));
-
- for (k = column*2 + 1, i = 0; i <= row; k += BLOCKSIZE/4, i++) {
- debug(printf("Adding diffs[%d] = %u\n",k,diffs[k]));
- ptr += diffs[k];
- }
-
- } else if (remainder <= 32) {
- ptr = /*offset0*/info[1];
- if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer+1 >= *pageptr) {
- ptr += POSITIONS_PAGE;
- pageptr++;
- }
- }
-
- column = (remainder - 1) % 4; /* Goes from 0 to 3 */
- row = (remainder - 1) / 4;
- debug(printf("column %d, row %d\n",column,row));
-
- for (k = column*2 + 1, i = 0; i < 4; k += BLOCKSIZE/4, i++) {
- debug(printf("Adding diffs[%d] = %u\n",k,diffs[k]));
- ptr += diffs[k];
- }
-
- for (k = column*2 + 2; i <= row; k += BLOCKSIZE/4, i++) {
- debug(printf("Adding diffs[%d] = %u\n",k,diffs[k]));
- ptr += diffs[k];
- }
-
- } else if (remainder <= 48) {
- ptr = /*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1];
- if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer+1 >= *pageptr) {
- ptr += POSITIONS_PAGE;
- pageptr++;
- }
- }
-
- column = (63 - remainder) % 4; /* Goes from 0 to 3. Assert remainder < 64 */
- row = (63 - remainder) / 4;
- debug(printf("column %d, row %d\n",column,row));
-
- for (k = column*2 + 9, i = 0; i < 4; k += BLOCKSIZE/4, i++) {
- debug(printf("Subtracting diffs[%d] = %u\n",k,diffs[k]));
- ptr -= diffs[k];
- }
-
- for (k = column*2 + 10; i <= row; k += BLOCKSIZE/4, i++) {
- debug(printf("Subtracting diffs[%d] = %u\n",k,diffs[k]));
- ptr -= diffs[k];
- }
-
- } else {
- ptr = /*offset1*/info[DIFFERENTIAL_METAINFO_SIZE+1];
- if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- while (bmer+1 >= *pageptr) {
- ptr += POSITIONS_PAGE;
- pageptr++;
- }
- }
-
- column = (63 - remainder) % 4; /* Goes from 0 to 3. Assert remainder < 64 */
- row = (63 - remainder) / 4;
- debug(printf("column %d, row %d\n",column,row));
-
- for (k = column*2 + 9, i = 0; i <= row; k += BLOCKSIZE/4, i++) {
- debug(printf("Subtracting diffs[%d] = %u\n",k,diffs[k]));
- ptr -= diffs[k];
- }
- }
-
- return ptr;
-
-#endif
+#endif /* BRANCH_FREE_QTR_BLOCK */
+#endif /* littleendian and SSE2 */
}
@@ -14218,12 +15173,12 @@ Bitpack64_block_offsets (UINT4 *offsets, Storedoligomer_T oligo,
UINT4 *info, nwritten;
UINT4 offset0, offset1, temp;
int packsize, k;
-#ifdef HAVE_SSE2
- __m128i diffs[8], *bitpack;
- UINT4 *_diffs;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
int column, row;
UINT4 diffs[BLOCKSIZE], columnar[BLOCKSIZE], *bitpack, *vertical;
+#else
+ __m128i diffs[8], *bitpack;
+ UINT4 *_diffs;
#endif
#ifdef DEBUG
int i;
@@ -14231,52 +15186,36 @@ Bitpack64_block_offsets (UINT4 *offsets, Storedoligomer_T oligo,
info = &(bitpackptrs[oligo/BLOCKSIZE * DIFFERENTIAL_METAINFO_SIZE]);
+#ifdef WORDS_BIGENDIAN
+ nwritten = Bigendian_convert_uint(info[0]);
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ offset0 = Bigendian_convert_uint(info[1]);
+ offset1 = Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE+1]);
+ packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten)*2;
+
+#elif !defined(HAVE_SSE2)
nwritten = info[0]; /* In 128-bit registers */
-#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-#else
bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-#endif
offset0 = info[1];
offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
-
packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
-#ifdef DEBUG
- printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
- oligo,nwritten,offset0,offset1,packsize);
+#else
+ nwritten = info[0]; /* In 128-bit registers */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ offset0 = info[1];
+ offset1 = info[DIFFERENTIAL_METAINFO_SIZE+1];
+ packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
#endif
-#ifdef HAVE_SSE2
-#ifdef DEBUG
- printf("bitpack:\n");
- for (i = 0; i < packsize/2; i++) {
- print_vector_hex(bitpack[i]);
- }
- printf("\n");
-#endif
-
- _diffs = (UINT4 *) &(diffs[0]);
-
- /* Unpack fwd 32 cumulative sums under SIMD */
- (unpacker_all_table[packsize])(&(diffs[0]),bitpack);
- vertical_order_fwd(&(offsets[1]),_diffs);
-
- /* Unpack rev 32 cumulative sums under SIMD */
- (unpacker_all_table[packsize+1])(&(diffs[0]),bitpack);
- vertical_order_rev(&(offsets[33]),_diffs);
#ifdef DEBUG
- printf("%u\n",offsets[i]);
- for (i = 1; i <= BLOCKSIZE; i += 4) {
- printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
- }
- printf("end of diffs vertical\n");
+ printf("oligo: %08X, nwritten %u, offset0 %u, offset1 %u, packsize %d\n",
+ oligo,nwritten,offset0,offset1,packsize);
#endif
-#else
-
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
/* Unpack all 64 diffs for non-SIMD */
(unpacker_all_table[packsize])(&(diffs[0]),bitpack);
@@ -14311,7 +15250,35 @@ Bitpack64_block_offsets (UINT4 *offsets, Storedoligomer_T oligo,
printf("end of diffs vertical\n");
#endif
-#endif /* HAVE_SSE2 */
+#else /* littleendian and SSE2 */
+
+#ifdef DEBUG
+ printf("bitpack:\n");
+ for (i = 0; i < packsize/2; i++) {
+ print_vector_hex(bitpack[i]);
+ }
+ printf("\n");
+#endif
+
+ _diffs = (UINT4 *) &(diffs[0]);
+
+ /* Unpack fwd 32 cumulative sums under SIMD */
+ (unpacker_all_table[packsize])(&(diffs[0]),bitpack);
+ vertical_order_fwd(&(offsets[1]),_diffs);
+
+ /* Unpack rev 32 cumulative sums under SIMD */
+ (unpacker_all_table[packsize+1])(&(diffs[0]),bitpack);
+ vertical_order_rev(&(offsets[33]),_diffs);
+
+#ifdef DEBUG
+ printf("%u\n",offsets[i]);
+ for (i = 1; i <= BLOCKSIZE; i += 4) {
+ printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
+ }
+ printf("end of diffs vertical\n");
+#endif
+
+#endif /* littleendian and SSE2 */
/* Perform cumulative sum */
offsets[0] = offset0;
@@ -14365,12 +15332,12 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
Storedoligomer_T bmer;
UINT8 offset0, offset1, temp;
int packsize, k;
-#ifdef HAVE_SSE2
- __m128i diffs[8], *bitpack;
- UINT4 *_diffs;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
int column, row;
UINT4 diffs[BLOCKSIZE], columnar[BLOCKSIZE], *bitpack, *vertical;
+#else
+ __m128i diffs[8], *bitpack;
+ UINT4 *_diffs;
#endif
#ifdef DEBUG
int i;
@@ -14379,13 +15346,33 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
bmer = oligo/BLOCKSIZE;
info = &(bitpackptrs[bmer * DIFFERENTIAL_METAINFO_SIZE]);
+
+#ifdef WORDS_BIGENDIAN
+ nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+
+#elif !defined(HAVE_SSE2)
nwritten = info[0]; /* In 128-bit registers */
-#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-#else
bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+
+#else
+ nwritten = info[0]; /* In 128-bit registers */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
#endif
+#ifdef WORDS_BIGENDIAN
+ offset0 = offset1 = 0UL;
+ pageptr = bitpackpages;
+ while (bmer >= Bigendian_convert_uint(*pageptr)) {
+ offset0 += POSITIONS_PAGE;
+ pageptr++;
+ }
+
+ offset1 = offset0;
+ if (bmer+1 >= Bigendian_convert_uint(*pageptr)) {
+ offset1 += POSITIONS_PAGE;
+ }
+#else
offset0 = offset1 = 0UL;
pageptr = bitpackpages;
while (bmer >= *pageptr) {
@@ -14397,11 +15384,18 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
if (bmer+1 >= *pageptr) {
offset1 += POSITIONS_PAGE;
}
+#endif
+
+#ifdef WORDS_BIGENDIAN
+ offset0 += Bigendian_convert_uint(info[1]);
+ offset1 += Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE+1]);
+ packsize = (Bigendian_convert_uint(info[DIFFERENTIAL_METAINFO_SIZE]) - nwritten)*2;
+#else
offset0 += info[1];
offset1 += info[DIFFERENTIAL_METAINFO_SIZE+1];
-
packsize = (info[DIFFERENTIAL_METAINFO_SIZE] - nwritten)*2;
+#endif
#ifdef DEBUG
@@ -14409,36 +15403,8 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
oligo,nwritten,offset0,offset1,packsize);
#endif
-#ifdef HAVE_SSE2
-#ifdef DEBUG
- printf("bitpack:\n");
- for (i = 0; i < packsize/2; i++) {
- print_vector_hex(bitpack[i]);
- }
- printf("\n");
-#endif
-
- _diffs = (UINT4 *) &(diffs[0]);
-
- /* Unpack fwd 32 cumulative sums under SIMD */
- (unpacker_all_table[packsize])(&(diffs[0]),bitpack);
- vertical_order_huge_fwd(&(offsets[1]),_diffs);
-
- /* Unpack rev 32 cumulative sums under SIMD */
- (unpacker_all_table[packsize+1])(&(diffs[0]),bitpack);
- vertical_order_huge_rev(&(offsets[33]),_diffs);
-
-#ifdef DEBUG
- printf("%u\n",offsets[i]);
- for (i = 1; i <= 64; i += 4) {
- printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
- }
- printf("end of diffs vertical\n");
-#endif
-
-
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
/* Unpack all 64 diffs for non-SIMD */
(unpacker_all_table[packsize])(&(diffs[0]),bitpack);
@@ -14473,6 +15439,34 @@ Bitpack64_block_offsets_huge (UINT8 *offsets, Storedoligomer_T oligo,
printf("end of diffs vertical\n");
#endif
+
+#else
+#ifdef DEBUG
+ printf("bitpack:\n");
+ for (i = 0; i < packsize/2; i++) {
+ print_vector_hex(bitpack[i]);
+ }
+ printf("\n");
+#endif
+
+ _diffs = (UINT4 *) &(diffs[0]);
+
+ /* Unpack fwd 32 cumulative sums under SIMD */
+ (unpacker_all_table[packsize])(&(diffs[0]),bitpack);
+ vertical_order_huge_fwd(&(offsets[1]),_diffs);
+
+ /* Unpack rev 32 cumulative sums under SIMD */
+ (unpacker_all_table[packsize+1])(&(diffs[0]),bitpack);
+ vertical_order_huge_rev(&(offsets[33]),_diffs);
+
+#ifdef DEBUG
+ printf("%u\n",offsets[i]);
+ for (i = 1; i <= 64; i += 4) {
+ printf("%u %u %u %u\n",offsets[i],offsets[i+1],offsets[i+2],offsets[i+3]);
+ }
+ printf("end of diffs vertical\n");
+#endif
+
#endif /* HAVE_SSE2 */
/* Perform cumulative sum */
diff --git a/src/bitpack64-readtwo.c b/src/bitpack64-readtwo.c
index afc5e72..5e143e9 100644
--- a/src/bitpack64-readtwo.c
+++ b/src/bitpack64-readtwo.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bitpack64-readtwo.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: bitpack64-readtwo.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8,7 +8,9 @@ static char rcsid[] = "$Id: bitpack64-readtwo.c 153955 2014-11-24 17:54:45Z twu
#include <stdio.h>
#include <stdlib.h>
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+#include "bigendian.h"
+#elif defined(HAVE_SSE2)
#include <emmintrin.h>
#endif
@@ -41,8 +43,9 @@ static char rcsid[] = "$Id: bitpack64-readtwo.c 153955 2014-11-24 17:54:45Z twu
/* #define BRANCH_FREE_ROW_SUM 1 -- Not supported here */
/* #define BRANCH_FREE_QTR_BLOCK 1 */
-#ifdef HAVE_SSE2
#ifdef DEBUG
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
/* For debugging */
static void
print_vector_hex (__m128i x) {
@@ -63,21 +66,6 @@ print_vector (__m128i x) {
#endif
-#if 0
-#ifdef HAVE_SSE2
-#ifdef ALLOW_ODD_PACKSIZES
-static __m128i mask1, mask2, mask3, mask4, mask5, mask6, mask7, mask8,
- mask9, mask10, mask11, mask12, mask13, mask14, mask15, mask16,
- mask17, mask18, mask19, mask20, mask21, mask22, mask23, mask24,
- mask25, mask26, mask27, mask28, mask29, mask30, mask31;
-#else
-static __m128i mask2, mask4, mask6, mask8, mask10, mask12, mask14, mask16,
- mask18, mask20, mask22, mask24, mask26, mask28, mask30;
-#endif
-#endif
-#endif
-
-
#define BLOCKSIZE 64
#if 0
@@ -125,7 +113,19 @@ Bitpack64_read_setup () {
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+static void
+unpack_00 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ int i;
+
+ for (i = 0; i < BLOCKSIZE; i++) {
+ *out++ = 0;
+ }
+
+ return;
+}
+
+#else
static void
unpack_00 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i total = _mm_set1_epi32(0U);
@@ -182,19 +182,6 @@ unpack_00_2_4 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-
-#else
-static void
-unpack_00 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- int i;
-
- for (i = 0; i < BLOCKSIZE; i++) {
- *out++ = 0;
- }
-
- return;
-}
#endif
@@ -273,7 +260,99 @@ unpack_01 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_02 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 2 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 2 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_02 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 2 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 2 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
static void
unpack_02_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -831,56 +910,11 @@ unpack_02_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-#else
-
-static void
-unpack_02 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 2 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 4 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 6 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 8 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 10 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 2 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 2 ) ;
- out++;
- }
-
- return;
-}
#endif
+
#ifdef ALLOW_ODD_PACKSIZES
static void
unpack_03 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
@@ -959,7 +993,50 @@ unpack_03 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_04 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 2 ; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 4) {
+ *(out++) = ( Bigendian_convert_uint(*in) >> inwordpointer ) % (1U << 4 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_04 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 2 ; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 4) {
+ *(out++) = ( (*in) >> inwordpointer ) % (1U << 4 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#else
static void
unpack_04_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -1514,27 +1591,6 @@ unpack_04_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_04 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- UINT4 outer, inwordpointer;
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- for (outer = 0; outer < 2 ; outer++) {
- for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 4) {
- *(out++) = ( (*in) >> inwordpointer ) % (1U << 4 ) ;
- }
- in += 4;
- }
- }
-
- return;
-}
#endif
@@ -1619,34 +1675,135 @@ unpack_05 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-unpack_06_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask6 = _mm_set1_epi32(63U);
-
- OutReg = _mm_and_si128( InReg , mask6);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,6) , mask6);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,12) , mask6);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,18) , mask6);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,24) , mask6);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_06 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,30) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 6 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 6 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 6 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 6 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_06 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 6 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 6 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 6 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 6 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 6 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_06_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask6 = _mm_set1_epi32(63U);
+
+ OutReg = _mm_and_si128( InReg , mask6);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,6) , mask6);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,12) , mask6);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,18) , mask6);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,24) , mask6);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,30) ;
+ InReg = _mm_load_si128(++in);
OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 6 - 4), mask6));
/* total = _mm_add_epi32(total, OutReg); */
@@ -2204,56 +2361,6 @@ unpack_06_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_06 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 6 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 6 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 6 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 10 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 6 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 6 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 8 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 6 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 6 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -2342,7 +2449,49 @@ unpack_07 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_08 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 4; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 8) {
+ *(out++) = ( Bigendian_convert_uint(*in) >> inwordpointer ) % (1U << 8 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_08 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 4; outer++) {
+ for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 8) {
+ *(out++) = ( (*in) >> inwordpointer ) % (1U << 8 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#else
static void
unpack_08_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -2933,28 +3082,6 @@ unpack_08_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-
-#else
-static void
-unpack_08 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- UINT4 outer, inwordpointer;
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- for (outer = 0; outer < 4; outer++) {
- for (inwordpointer = 0; inwordpointer < 32; inwordpointer += 8) {
- *(out++) = ( (*in) >> inwordpointer ) % (1U << 8 ) ;
- }
- in += 4;
- }
- }
-
- return;
-}
#endif
@@ -3047,25 +3174,133 @@ unpack_09 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_10_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask10 = _mm_set1_epi32(1023U);
+unpack_10 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_and_si128( InReg , mask10);
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,10) , mask10);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 10 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 10 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 10 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 10 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 10 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 10 ) ;
+ out++;
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,20) , mask10);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ }
+ return;
+}
- OutReg = _mm_srli_epi32(InReg,30) ;
+#elif !defined(HAVE_SSE2)
+static void
+unpack_10 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 10 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 10 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 10 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 10 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 10 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 10 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 10 ) ;
+ out++;
+
+ }
+ return;
+}
+
+#else
+static void
+unpack_10_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask10 = _mm_set1_epi32(1023U);
+
+ OutReg = _mm_and_si128( InReg , mask10);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,10) , mask10);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,20) , mask10);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,30) ;
InReg = _mm_load_si128(++in);
OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 10 - 8), mask10));
@@ -3676,60 +3911,6 @@ unpack_10_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_10 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 10 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 10 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 10 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 10 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 10 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 10 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 10 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 10 ) ;
- out++;
-
- }
- return;
-}
#endif
@@ -3823,7 +4004,117 @@ unpack_11 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_12 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 12 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 12 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_12 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 12 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 12 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 12 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 12 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
static void
unpack_12_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -4454,73 +4745,18 @@ unpack_12_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
+#endif
-#else
+
+#ifdef ALLOW_ODD_PACKSIZES
static void
-unpack_12 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
+unpack_13 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask13 = _mm_set1_epi32(8191U);
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 12 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 12 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 12 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 12 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 12 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 12 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 12 ) ;
- out++;
- }
-
- return;
-}
-#endif
-
-
-#ifdef ALLOW_ODD_PACKSIZES
-static void
-unpack_13 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask13 = _mm_set1_epi32(8191U);
-
- OutReg = _mm_and_si128( InReg , mask13);
- _mm_store_si128(out++, OutReg);
+ OutReg = _mm_and_si128( InReg , mask13);
+ _mm_store_si128(out++, OutReg);
OutReg = _mm_and_si128( _mm_srli_epi32(InReg,13) , mask13);
/* total = _mm_add_epi32(total, OutReg); */
@@ -4605,7 +4841,123 @@ unpack_13 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_14 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 14 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 14 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 14 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 14 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 14 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 14 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 14 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 14 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_14 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 14 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 14 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 14 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 14 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 14 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 14 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 14 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 14 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 14 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
static void
unpack_14_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -5263,64 +5615,6 @@ unpack_14_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_14 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 14 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 14 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 14 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 14 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 16 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 14 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 14 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 14 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 14 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 14 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 14 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -5421,7 +5715,50 @@ unpack_15 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_16 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 8; outer++) {
+ for(inwordpointer = 0; inwordpointer <32; inwordpointer += 16) {
+ *(out++) = ( Bigendian_convert_uint(*in) >> inwordpointer ) % (1U << 16 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_16 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ UINT4 outer, inwordpointer;
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ for (outer = 0; outer < 8; outer++) {
+ for(inwordpointer = 0; inwordpointer <32; inwordpointer += 16) {
+ *(out++) = ( (*in) >> inwordpointer ) % (1U << 16 ) ;
+ }
+ in += 4;
+ }
+ }
+
+ return;
+}
+
+#else
static void
unpack_16_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
__m128i InReg = _mm_load_si128(in);
@@ -6063,27 +6400,6 @@ unpack_16_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_16 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- UINT4 outer, inwordpointer;
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- for (outer = 0; outer < 8; outer++) {
- for(inwordpointer = 0; inwordpointer <32; inwordpointer += 16) {
- *(out++) = ( (*in) >> inwordpointer ) % (1U << 16 ) ;
- }
- in += 4;
- }
- }
-
- return;
-}
#endif
@@ -6186,15 +6502,140 @@ unpack_17 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-unpack_18_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask18 = _mm_set1_epi32(262143U);
- OutReg = _mm_and_si128( InReg , mask18);
- _mm_store_si128(out++, OutReg);
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_18 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 18 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 18 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 18 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 18 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 18 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 18 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 18 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 18 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 18 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 18 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_18 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 18 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 18 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 18 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 18 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 18 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 18 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 18 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 18 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 18 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 18 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 18 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_18_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask18 = _mm_set1_epi32(262143U);
+
+ OutReg = _mm_and_si128( InReg , mask18);
+ _mm_store_si128(out++, OutReg);
OutReg = _mm_srli_epi32(InReg,18) ;
InReg = _mm_load_si128(++in);
@@ -6872,68 +7313,6 @@ unpack_18_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_18 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 18 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 18 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 18 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 18 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 18 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 18 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 18 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 18 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 18 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 18 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 18 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 18 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -7039,64 +7418,191 @@ unpack_19 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-unpack_20_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask20 = _mm_set1_epi32(1048575U);
-
- OutReg = _mm_and_si128( InReg , mask20);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,20) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask20));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,8) , mask20);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,28) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask20));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,16) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask20));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,4) , mask20);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_20 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask20));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_srli_epi32(InReg,12) ;
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 20 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 20 ) ;
+ out++;
+ }
- return;
+ return;
}
+#elif !defined(HAVE_SSE2)
static void
-unpack_20_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg;
- __m128i total;
- const __m128i mask20 = _mm_set1_epi32(1048575U);
+unpack_20 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 20 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 20 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 20 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 20 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_20_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask20 = _mm_set1_epi32(1048575U);
+
+ OutReg = _mm_and_si128( InReg , mask20);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,20) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 8), mask20));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,8) , mask20);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,28) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 16), mask20));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,16) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 4), mask20));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,4) , mask20);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 20 - 12), mask20));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,12) ;
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ return;
+}
+
+static void
+unpack_20_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg;
+ __m128i total;
+ const __m128i mask20 = _mm_set1_epi32(1048575U);
/* 1 */
InReg = _mm_load_si128(in);
@@ -7727,70 +8233,6 @@ unpack_20_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-
-#else
-static void
-unpack_20 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 20 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 20 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 20 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 20 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 20 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 20 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 20 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 20 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 20 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -7899,49 +8341,181 @@ unpack_21 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_22_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask22 = _mm_set1_epi32(4194303U);
-
- OutReg = _mm_and_si128( InReg , mask22);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,22) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask22));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,12) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask22));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+unpack_22 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,2) , mask22);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 22 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 22 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 22 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 22 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 22 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 22 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 18 ))<<( 22 - 18 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 22 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 22 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 22 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 22 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 22 ) ;
+ out++;
+ }
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask22));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ return;
+}
- OutReg = _mm_srli_epi32(InReg,14) ;
- InReg = _mm_load_si128(++in);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_22 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask22));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_and_si128( _mm_srli_epi32(InReg,4) , mask22);
+ *out = ( (*in) >> 0 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 22 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 22 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 22 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 22 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 22 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 22 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 18 ))<<( 22 - 18 );
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 22 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 22 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 22 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 22 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 22 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 22 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_22_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask22 = _mm_set1_epi32(4194303U);
+
+ OutReg = _mm_and_si128( InReg , mask22);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,22) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 12), mask22));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,12) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 2), mask22));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,2) , mask22);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 14), mask22));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,14) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 22 - 4), mask22));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( _mm_srli_epi32(InReg,4) , mask22);
/* total = _mm_add_epi32(total, OutReg); */
_mm_store_si128(out++, OutReg);
@@ -8610,72 +9184,6 @@ unpack_22_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_22 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 22 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 22 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 22 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 22 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 22 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 22 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 22 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 18 ))<<( 22 - 18 );
- out++;
- *out = ( (*in) >> 18 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 22 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 22 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 22 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 22 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 22 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 22 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -8787,68 +9295,199 @@ unpack_23 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-unpack_24_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask24 = _mm_set1_epi32(16777215U);
-
- OutReg = _mm_and_si128( InReg , mask24);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask24));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,16) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask24));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,8) ;
- InReg = _mm_load_si128(++in);
-
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_and_si128( InReg , mask24);
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask24));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
- OutReg = _mm_srli_epi32(InReg,16) ;
- InReg = _mm_load_si128(++in);
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_24 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask24));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_srli_epi32(InReg,8) ;
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 24 ) ;
+ out++;
+ }
- return;
+ return;
}
+#elif !defined(HAVE_SSE2)
static void
-unpack_24_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg;
- __m128i OutReg, total;
- const __m128i mask24 = _mm_set1_epi32(16777215U);
-
- /* 1 */
+unpack_24 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 24 ) ;
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 24 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 24 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_24_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask24 = _mm_set1_epi32(16777215U);
+
+ OutReg = _mm_and_si128( InReg , mask24);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask24));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,16) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask24));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,8) ;
+ InReg = _mm_load_si128(++in);
+
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_and_si128( InReg , mask24);
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 16), mask24));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,16) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 24 - 8), mask24));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,8) ;
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ return;
+}
+
+static void
+unpack_24_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg;
+ __m128i OutReg, total;
+ const __m128i mask24 = _mm_set1_epi32(16777215U);
+
+ /* 1 */
InReg = _mm_load_si128(in);
total = /* OutReg = */ _mm_and_si128( InReg , mask24);
@@ -9489,72 +10128,6 @@ unpack_24_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-
-#else
-static void
-unpack_24 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 24 ) ;
- out++;
- *out = ( (*in) >> 24 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 24 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 24 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 24 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 24 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -9669,32 +10242,173 @@ unpack_25 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-unpack_26_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask26 = _mm_set1_epi32(67108863U);
-
- OutReg = _mm_and_si128( InReg , mask26);
- _mm_store_si128(out++, OutReg);
- OutReg = _mm_srli_epi32(InReg,26) ;
- InReg = _mm_load_si128(++in);
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_26 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask26));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_srli_epi32(InReg,20) ;
- InReg = _mm_load_si128(++in);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 26 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 26 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 26 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 26 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 26 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 26 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 22 ))<<( 26 - 22 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 26 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 26 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 26 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 26 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 26 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 18 ))<<( 26 - 18 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 26 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 26 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 26 ) ;
+ out++;
+ }
+
+ return;
+}
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask26));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+#elif !defined(HAVE_SSE2)
+static void
+unpack_26 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,14) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 26 ) ;
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 26 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 26 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 26 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 26 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 26 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 22 ))<<( 26 - 22 );
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 26 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 26 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 26 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 26 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 26 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 18 ))<<( 26 - 18 );
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 26 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 26 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 26 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 26 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_26_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask26 = _mm_set1_epi32(67108863U);
+
+ OutReg = _mm_and_si128( InReg , mask26);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,26) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 20), mask26));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,20) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 14), mask26));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,14) ;
+ InReg = _mm_load_si128(++in);
OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 26 - 8), mask26));
/* total = _mm_add_epi32(total, OutReg); */
@@ -10404,76 +11118,6 @@ unpack_26_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_26 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 26 ) ;
- out++;
- *out = ( (*in) >> 26 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 26 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 26 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 26 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 26 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 26 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 22 ))<<( 26 - 22 );
- out++;
- *out = ( (*in) >> 22 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 26 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 26 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 26 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 26 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 26 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 18 ))<<( 26 - 18 );
- out++;
- *out = ( (*in) >> 18 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 26 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 26 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 26 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 26 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -10591,72 +11235,215 @@ unpack_27 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-unpack_28_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask28 = _mm_set1_epi32(268435455U);
-
- OutReg = _mm_and_si128( InReg , mask28);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,28) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask28));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,24) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask28));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,20) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask28));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,16) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask28));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_srli_epi32(InReg,12) ;
- InReg = _mm_load_si128(++in);
-
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask28));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
- OutReg = _mm_srli_epi32(InReg,8) ;
- InReg = _mm_load_si128(++in);
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_28 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask28));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_srli_epi32(InReg,4) ;
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 28 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 28 ) ;
+ out++;
+ }
- return;
+ return;
}
+#elif !defined(HAVE_SSE2)
static void
-unpack_28_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg;
- __m128i OutReg, total;
- const __m128i mask28 = _mm_set1_epi32(268435455U);
+unpack_28 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- /* 1 */
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 28 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) % (1U << 28 ) ;
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 28 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 28 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_28_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask28 = _mm_set1_epi32(268435455U);
+
+ OutReg = _mm_and_si128( InReg , mask28);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,28) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 24), mask28));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,24) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 20), mask28));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,20) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 16), mask28));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,16) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 12), mask28));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,12) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 8), mask28));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,8) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 28 - 4), mask28));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,4) ;
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ return;
+}
+
+static void
+unpack_28_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg;
+ __m128i OutReg, total;
+ const __m128i mask28 = _mm_set1_epi32(268435455U);
+
+ /* 1 */
InReg = _mm_load_si128(in);
total = /* OutReg = */ _mm_and_si128( InReg , mask28);
@@ -11328,77 +12115,6 @@ unpack_28_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-#else
-static void
-unpack_28 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 28 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 28 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) % (1U << 28 ) ;
- out++;
- *out = ( (*in) >> 28 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 28 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 28 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 28 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 28 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 28 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 28 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 28 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 28 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -11519,25 +12235,174 @@ unpack_29 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-unpack_30_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i InReg = _mm_load_si128(in);
- __m128i OutReg;
- const __m128i mask30 = _mm_set1_epi32(1073741823U);
- OutReg = _mm_and_si128( InReg , mask30);
- _mm_store_si128(out++, OutReg);
+#ifdef WORDS_BIGENDIAN
+static void
+unpack_30 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_srli_epi32(InReg,30) ;
- InReg = _mm_load_si128(++in);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask30));
- /* total = _mm_add_epi32(total, OutReg); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) % (1U << 30 ) ;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 30 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 28 ))<<( 30 - 28 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 28 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 26 ))<<( 30 - 26 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 26 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 24 ))<<( 30 - 24 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 24 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 22 ))<<( 30 - 22 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 22 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 20 ))<<( 30 - 20 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 20 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 18 ))<<( 30 - 18 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 18 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 16 ))<<( 30 - 16 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 16 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 14 ))<<( 30 - 14 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 14 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 12 ))<<( 30 - 12 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 12 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 10 ))<<( 30 - 10 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 10 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 8 ))<<( 30 - 8 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 8 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 6 ))<<( 30 - 6 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 6 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 4 ))<<( 30 - 4 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 4 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= (Bigendian_convert_uint(*in) % (1U<< 2 ))<<( 30 - 2 );
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 2 ) % (1U << 30 ) ;
+ out++;
+ }
- OutReg = _mm_srli_epi32(InReg,28) ;
- InReg = _mm_load_si128(++in);
+ return;
+}
+
+#elif !defined(HAVE_SSE2)
+static void
+unpack_30 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
+
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
+
+ *out = ( (*in) >> 0 ) % (1U << 30 ) ;
+ out++;
+ *out = ( (*in) >> 30 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 28 ))<<( 30 - 28 );
+ out++;
+ *out = ( (*in) >> 28 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 26 ))<<( 30 - 26 );
+ out++;
+ *out = ( (*in) >> 26 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 24 ))<<( 30 - 24 );
+ out++;
+ *out = ( (*in) >> 24 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 22 ))<<( 30 - 22 );
+ out++;
+ *out = ( (*in) >> 22 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 20 ))<<( 30 - 20 );
+ out++;
+ *out = ( (*in) >> 20 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 18 ))<<( 30 - 18 );
+ out++;
+ *out = ( (*in) >> 18 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 16 ))<<( 30 - 16 );
+ out++;
+ *out = ( (*in) >> 16 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 14 ))<<( 30 - 14 );
+ out++;
+ *out = ( (*in) >> 14 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 12 ))<<( 30 - 12 );
+ out++;
+ *out = ( (*in) >> 12 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 10 ))<<( 30 - 10 );
+ out++;
+ *out = ( (*in) >> 10 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 8 ))<<( 30 - 8 );
+ out++;
+ *out = ( (*in) >> 8 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 6 ))<<( 30 - 6 );
+ out++;
+ *out = ( (*in) >> 6 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 4 ))<<( 30 - 4 );
+ out++;
+ *out = ( (*in) >> 4 ) % (1U << 30 ) ;
+ in += 4;
+ *out |= ((*in) % (1U<< 2 ))<<( 30 - 2 );
+ out++;
+ *out = ( (*in) >> 2 ) % (1U << 30 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_30_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i InReg = _mm_load_si128(in);
+ __m128i OutReg;
+ const __m128i mask30 = _mm_set1_epi32(1073741823U);
+
+ OutReg = _mm_and_si128( InReg , mask30);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,30) ;
+ InReg = _mm_load_si128(++in);
+
+ OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 28), mask30));
+ /* total = _mm_add_epi32(total, OutReg); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_srli_epi32(InReg,28) ;
+ InReg = _mm_load_si128(++in);
OutReg = _mm_or_si128(OutReg, _mm_and_si128(_mm_slli_epi32(InReg, 30 - 26), mask30));
/* total = _mm_add_epi32(total, OutReg); */
@@ -12280,81 +13145,6 @@ unpack_30_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-
-#else
-static void
-unpack_30 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) % (1U << 30 ) ;
- out++;
- *out = ( (*in) >> 30 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 28 ))<<( 30 - 28 );
- out++;
- *out = ( (*in) >> 28 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 26 ))<<( 30 - 26 );
- out++;
- *out = ( (*in) >> 26 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 24 ))<<( 30 - 24 );
- out++;
- *out = ( (*in) >> 24 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 22 ))<<( 30 - 22 );
- out++;
- *out = ( (*in) >> 22 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 20 ))<<( 30 - 20 );
- out++;
- *out = ( (*in) >> 20 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 18 ))<<( 30 - 18 );
- out++;
- *out = ( (*in) >> 18 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 16 ))<<( 30 - 16 );
- out++;
- *out = ( (*in) >> 16 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 14 ))<<( 30 - 14 );
- out++;
- *out = ( (*in) >> 14 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 12 ))<<( 30 - 12 );
- out++;
- *out = ( (*in) >> 12 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 10 ))<<( 30 - 10 );
- out++;
- *out = ( (*in) >> 10 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 8 ))<<( 30 - 8 );
- out++;
- *out = ( (*in) >> 8 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 6 ))<<( 30 - 6 );
- out++;
- *out = ( (*in) >> 6 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 4 ))<<( 30 - 4 );
- out++;
- *out = ( (*in) >> 4 ) % (1U << 30 ) ;
- in += 4;
- *out |= ((*in) % (1U<< 2 ))<<( 30 - 2 );
- out++;
- *out = ( (*in) >> 2 ) % (1U << 30 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -12479,59 +13269,181 @@ unpack_31 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
static void
-unpack_32_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i OutReg;
-
- OutReg = _mm_load_si128(in++);
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
-
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- OutReg = _mm_load_si128(in++);
- /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
- _mm_store_si128(out++, OutReg);
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( Bigendian_convert_uint(*in) >> 0 ) ;
+ out++;
+ }
- return;
+ return;
}
+#elif !defined(HAVE_SSE2)
static void
-unpack_32_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
- __m128i total;
+unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
+ unsigned int column;
+ const UINT4 *bitpack = in;
- /* 1 */
- total = _mm_load_si128(in);
- _mm_store_si128(out++, total);
+ for (column = 0; column < 4; column++) {
+ in = &(bitpack[column]);
- /* Skip row */
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
out++;
-
- /* 3 */
- in += 2;
- total = _mm_load_si128(in);
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ in += 4;
+ out++;
+ *out = ( (*in) >> 0 ) ;
+ out++;
+ }
+
+ return;
+}
+
+#else
+static void
+unpack_32_fwd (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i OutReg;
+
+ OutReg = _mm_load_si128(in++);
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ OutReg = _mm_load_si128(in++);
+ /* total = _mm_add_epi32(total, _mm_load_si128(in++)); */
+ _mm_store_si128(out++, OutReg);
+
+ return;
+}
+
+static void
+unpack_32_fwd_1_3 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
+ __m128i total;
+
+ /* 1 */
+ total = _mm_load_si128(in);
+ _mm_store_si128(out++, total);
+
+ /* Skip row */
+ out++;
+
+ /* 3 */
+ in += 2;
+ total = _mm_load_si128(in);
_mm_store_si128(out++, total);
return;
@@ -12972,69 +13884,6 @@ unpack_32_rev_8_2 (__m128i* __restrict__ out, const __m128i* __restrict__ in) {
return;
}
-
-
-
-#else
-static void
-unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
- unsigned int column;
- const UINT4 *bitpack = in;
-
- for (column = 0; column < 4; column++) {
- in = &(bitpack[column]);
-
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- in += 4;
- out++;
- *out = ( (*in) >> 0 ) ;
- out++;
- }
-
- return;
-}
#endif
@@ -13129,345 +13978,10 @@ unpack_32 (UINT4* __restrict__ out, const UINT4* __restrict__ in) {
#endif
-#ifdef HAVE_SSE2
-static void
-vertical_order_fwd (UINT4 *vertical, UINT4 *columnar) {
-
- vertical[0] = columnar[0]; /* remainder 1 */
- vertical[4] = columnar[1]; /* remainder 5 */
- vertical[8] = columnar[2]; /* remainder 9 */
- vertical[12] = columnar[3]; /* remainder 13 */
- vertical[16] = columnar[4]; /* remainder 17 */
- vertical[20] = columnar[5]; /* remainder 21 */
- vertical[24] = columnar[6]; /* remainder 25 */
- vertical[28] = columnar[7]; /* remainder 29 */
-
- vertical[1] = columnar[8]; /* remainder 2 */
- vertical[5] = columnar[9]; /* remainder 6 */
- vertical[9] = columnar[10]; /* remainder 10 */
- vertical[13] = columnar[11]; /* remainder 14 */
- vertical[17] = columnar[12]; /* remainder 18 */
- vertical[21] = columnar[13]; /* remainder 22 */
- vertical[25] = columnar[14]; /* remainder 26 */
- vertical[29] = columnar[15]; /* remainder 30 */
-
- vertical[2] = columnar[16]; /* remainder 3 */
- vertical[6] = columnar[17]; /* remainder 7 */
- vertical[10] = columnar[18]; /* remainder 11 */
- vertical[14] = columnar[19]; /* remainder 15 */
- vertical[18] = columnar[20]; /* remainder 19 */
- vertical[22] = columnar[21]; /* remainder 23 */
- vertical[26] = columnar[22]; /* remainder 27 */
- vertical[30] = columnar[23]; /* remainder 31 */
-
- vertical[3] = columnar[24]; /* remainder 4 */
- vertical[7] = columnar[25]; /* remainder 8 */
- vertical[11] = columnar[26]; /* remainder 12 */
- vertical[15] = columnar[27]; /* remainder 16 */
- vertical[19] = columnar[28]; /* remainder 20 */
- vertical[23] = columnar[29]; /* remainder 24 */
- vertical[27] = columnar[30]; /* remainder 28 */
- vertical[31] = columnar[31]; /* remainder 32 */
-
- return;
-}
-
-static void
-vertical_order_rev (UINT4 *vertical, UINT4 *columnar) {
-
- vertical[0] = columnar[0]; /* remainder 63 */
- vertical[4] = columnar[1]; /* remainder 59 */
- vertical[8] = columnar[2]; /* remainder 55 */
- vertical[12] = columnar[3]; /* remainder 51 */
- vertical[16] = columnar[4]; /* remainder 47 */
- vertical[20] = columnar[5]; /* remainder 43 */
- vertical[24] = columnar[6]; /* remainder 39 */
- vertical[28] = columnar[7]; /* remainder 35 */
-
- vertical[1] = columnar[8]; /* remainder 62 */
- vertical[5] = columnar[9]; /* remainder 58 */
- vertical[9] = columnar[10]; /* remainder 54 */
- vertical[13] = columnar[11]; /* remainder 50 */
- vertical[17] = columnar[12]; /* remainder 46 */
- vertical[21] = columnar[13]; /* remainder 42 */
- vertical[25] = columnar[14]; /* remainder 38 */
- vertical[29] = columnar[15]; /* remainder 34 */
-
- vertical[2] = columnar[16]; /* remainder 61 */
- vertical[6] = columnar[17]; /* remainder 57 */
- vertical[10] = columnar[18]; /* remainder 53 */
- vertical[14] = columnar[19]; /* remainder 49 */
- vertical[18] = columnar[20]; /* remainder 45 */
- vertical[22] = columnar[21]; /* remainder 41 */
- vertical[26] = columnar[22]; /* remainder 37 */
- vertical[30] = columnar[23]; /* remainder 33 */
-
- vertical[3] = columnar[24]; /* remainder 60 */
- vertical[7] = columnar[25]; /* remainder 56 */
- vertical[11] = columnar[26]; /* remainder 52 */
- vertical[15] = columnar[27]; /* remainder 48 */
- vertical[19] = columnar[28]; /* remainder 44 */
- vertical[23] = columnar[29]; /* remainder 40 */
- vertical[27] = columnar[30]; /* remainder 36 */
- vertical[31] = columnar[31]; /* remainder 32 */
-
- return;
-}
-
-static void
-vertical_order_huge_fwd (UINT8 *vertical, UINT4 *columnar) {
-
- vertical[0] = (UINT8) columnar[0]; /* remainder 1 */
- vertical[4] = (UINT8) columnar[1]; /* remainder 5 */
- vertical[8] = (UINT8) columnar[2]; /* remainder 9 */
- vertical[12] = (UINT8) columnar[3]; /* remainder 13 */
- vertical[16] = (UINT8) columnar[4]; /* remainder 17 */
- vertical[20] = (UINT8) columnar[5]; /* remainder 21 */
- vertical[24] = (UINT8) columnar[6]; /* remainder 25 */
- vertical[28] = (UINT8) columnar[7]; /* remainder 29 */
-
- vertical[1] = (UINT8) columnar[8]; /* remainder 2 */
- vertical[5] = (UINT8) columnar[9]; /* remainder 6 */
- vertical[9] = (UINT8) columnar[10]; /* remainder 10 */
- vertical[13] = (UINT8) columnar[11]; /* remainder 14 */
- vertical[17] = (UINT8) columnar[12]; /* remainder 18 */
- vertical[21] = (UINT8) columnar[13]; /* remainder 22 */
- vertical[25] = (UINT8) columnar[14]; /* remainder 26 */
- vertical[29] = (UINT8) columnar[15]; /* remainder 30 */
-
- vertical[2] = (UINT8) columnar[16]; /* remainder 3 */
- vertical[6] = (UINT8) columnar[17]; /* remainder 7 */
- vertical[10] = (UINT8) columnar[18]; /* remainder 11 */
- vertical[14] = (UINT8) columnar[19]; /* remainder 15 */
- vertical[18] = (UINT8) columnar[20]; /* remainder 19 */
- vertical[22] = (UINT8) columnar[21]; /* remainder 23 */
- vertical[26] = (UINT8) columnar[22]; /* remainder 27 */
- vertical[30] = (UINT8) columnar[23]; /* remainder 31 */
-
- vertical[3] = (UINT8) columnar[24]; /* remainder 4 */
- vertical[7] = (UINT8) columnar[25]; /* remainder 8 */
- vertical[11] = (UINT8) columnar[26]; /* remainder 12 */
- vertical[15] = (UINT8) columnar[27]; /* remainder 16 */
- vertical[19] = (UINT8) columnar[28]; /* remainder 20 */
- vertical[23] = (UINT8) columnar[29]; /* remainder 24 */
- vertical[27] = (UINT8) columnar[30]; /* remainder 28 */
- vertical[31] = (UINT8) columnar[31]; /* remainder 32 */
-
- return;
-}
-
-static void
-vertical_order_huge_rev (UINT8 *vertical, UINT4 *columnar) {
-
- vertical[0] = (UINT8) columnar[0]; /* remainder 63 */
- vertical[4] = (UINT8) columnar[1]; /* remainder 59 */
- vertical[8] = (UINT8) columnar[2]; /* remainder 55 */
- vertical[12] = (UINT8) columnar[3]; /* remainder 51 */
- vertical[16] = (UINT8) columnar[4]; /* remainder 47 */
- vertical[20] = (UINT8) columnar[5]; /* remainder 43 */
- vertical[24] = (UINT8) columnar[6]; /* remainder 39 */
- vertical[28] = (UINT8) columnar[7]; /* remainder 35 */
-
- vertical[1] = (UINT8) columnar[8]; /* remainder 62 */
- vertical[5] = (UINT8) columnar[9]; /* remainder 58 */
- vertical[9] = (UINT8) columnar[10]; /* remainder 54 */
- vertical[13] = (UINT8) columnar[11]; /* remainder 50 */
- vertical[17] = (UINT8) columnar[12]; /* remainder 46 */
- vertical[21] = (UINT8) columnar[13]; /* remainder 42 */
- vertical[25] = (UINT8) columnar[14]; /* remainder 38 */
- vertical[29] = (UINT8) columnar[15]; /* remainder 34 */
-
- vertical[2] = (UINT8) columnar[16]; /* remainder 61 */
- vertical[6] = (UINT8) columnar[17]; /* remainder 57 */
- vertical[10] = (UINT8) columnar[18]; /* remainder 53 */
- vertical[14] = (UINT8) columnar[19]; /* remainder 49 */
- vertical[18] = (UINT8) columnar[20]; /* remainder 45 */
- vertical[22] = (UINT8) columnar[21]; /* remainder 41 */
- vertical[26] = (UINT8) columnar[22]; /* remainder 37 */
- vertical[30] = (UINT8) columnar[23]; /* remainder 33 */
-
- vertical[3] = (UINT8) columnar[24]; /* remainder 60 */
- vertical[7] = (UINT8) columnar[25]; /* remainder 56 */
- vertical[11] = (UINT8) columnar[26]; /* remainder 52 */
- vertical[15] = (UINT8) columnar[27]; /* remainder 48 */
- vertical[19] = (UINT8) columnar[28]; /* remainder 44 */
- vertical[23] = (UINT8) columnar[29]; /* remainder 40 */
- vertical[27] = (UINT8) columnar[30]; /* remainder 36 */
- vertical[31] = (UINT8) columnar[31]; /* remainder 32 */
-
- return;
-}
-
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+typedef void (*Unpacker_T) (UINT4* __restrict__, const UINT4* __restrict__);
#else
-
-#if 0
-static void
-vertical_order (UINT4 *vertical, UINT4 *columnar) {
-
- vertical[0] = columnar[0]; /* remainder 1 */
- vertical[4] = columnar[1]; /* remainder 5 */
- vertical[8] = columnar[2]; /* remainder 9 */
- vertical[12] = columnar[3]; /* remainder 13 */
- vertical[16] = columnar[4]; /* remainder 17 */
- vertical[20] = columnar[5]; /* remainder 21 */
- vertical[24] = columnar[6]; /* remainder 25 */
- vertical[28] = columnar[7]; /* remainder 29 */
-
- vertical[1] = columnar[8]; /* remainder 2 */
- vertical[5] = columnar[9]; /* remainder 6 */
- vertical[9] = columnar[10]; /* remainder 10 */
- vertical[13] = columnar[11]; /* remainder 14 */
- vertical[17] = columnar[12]; /* remainder 18 */
- vertical[21] = columnar[13]; /* remainder 22 */
- vertical[25] = columnar[14]; /* remainder 26 */
- vertical[29] = columnar[15]; /* remainder 30 */
-
- vertical[2] = columnar[16]; /* remainder 3 */
- vertical[6] = columnar[17]; /* remainder 7 */
- vertical[10] = columnar[18]; /* remainder 11 */
- vertical[14] = columnar[19]; /* remainder 15 */
- vertical[18] = columnar[20]; /* remainder 19 */
- vertical[22] = columnar[21]; /* remainder 23 */
- vertical[26] = columnar[22]; /* remainder 27 */
- vertical[30] = columnar[23]; /* remainder 31 */
-
- vertical[3] = columnar[24]; /* remainder 4 */
- vertical[7] = columnar[25]; /* remainder 8 */
- vertical[11] = columnar[26]; /* remainder 12 */
- vertical[15] = columnar[27]; /* remainder 16 */
- vertical[19] = columnar[28]; /* remainder 20 */
- vertical[23] = columnar[29]; /* remainder 24 */
- vertical[27] = columnar[30]; /* remainder 28 */
- vertical[31] = columnar[31]; /* remainder 32 */
-
- vertical[32] = columnar[32]; /* remainder 63 */
- vertical[36] = columnar[33]; /* remainder 59 */
- vertical[40] = columnar[34]; /* remainder 55 */
- vertical[44] = columnar[35]; /* remainder 51 */
- vertical[48] = columnar[36]; /* remainder 47 */
- vertical[52] = columnar[37]; /* remainder 43 */
- vertical[56] = columnar[38]; /* remainder 39 */
- vertical[60] = columnar[39]; /* remainder 35 */
-
- vertical[33] = columnar[40]; /* remainder 62 */
- vertical[37] = columnar[41]; /* remainder 58 */
- vertical[41] = columnar[42]; /* remainder 54 */
- vertical[45] = columnar[43]; /* remainder 50 */
- vertical[49] = columnar[44]; /* remainder 46 */
- vertical[53] = columnar[45]; /* remainder 42 */
- vertical[57] = columnar[46]; /* remainder 38 */
- vertical[61] = columnar[47]; /* remainder 34 */
-
- vertical[34] = columnar[48]; /* remainder 61 */
- vertical[38] = columnar[49]; /* remainder 57 */
- vertical[42] = columnar[50]; /* remainder 53 */
- vertical[46] = columnar[51]; /* remainder 49 */
- vertical[50] = columnar[52]; /* remainder 45 */
- vertical[54] = columnar[53]; /* remainder 41 */
- vertical[58] = columnar[54]; /* remainder 37 */
- vertical[62] = columnar[55]; /* remainder 33 */
-
- vertical[35] = columnar[56]; /* remainder 60 */
- vertical[39] = columnar[57]; /* remainder 56 */
- vertical[43] = columnar[58]; /* remainder 52 */
- vertical[47] = columnar[59]; /* remainder 48 */
- vertical[51] = columnar[60]; /* remainder 44 */
- vertical[55] = columnar[61]; /* remainder 40 */
- vertical[59] = columnar[62]; /* remainder 36 */
- vertical[63] = columnar[63]; /* remainder 32 */
-
- return;
-}
-#endif
-
-#if 0
-static void
-vertical_order_huge (UINT8 *vertical, UINT4 *columnar) {
-
- vertical[0] = (UINT8) columnar[0]; /* remainder 1 */
- vertical[4] = (UINT8) columnar[1]; /* remainder 5 */
- vertical[8] = (UINT8) columnar[2]; /* remainder 9 */
- vertical[12] = (UINT8) columnar[3]; /* remainder 13 */
- vertical[16] = (UINT8) columnar[4]; /* remainder 17 */
- vertical[20] = (UINT8) columnar[5]; /* remainder 21 */
- vertical[24] = (UINT8) columnar[6]; /* remainder 25 */
- vertical[28] = (UINT8) columnar[7]; /* remainder 29 */
-
- vertical[1] = (UINT8) columnar[8]; /* remainder 2 */
- vertical[5] = (UINT8) columnar[9]; /* remainder 6 */
- vertical[9] = (UINT8) columnar[10]; /* remainder 10 */
- vertical[13] = (UINT8) columnar[11]; /* remainder 14 */
- vertical[17] = (UINT8) columnar[12]; /* remainder 18 */
- vertical[21] = (UINT8) columnar[13]; /* remainder 22 */
- vertical[25] = (UINT8) columnar[14]; /* remainder 26 */
- vertical[29] = (UINT8) columnar[15]; /* remainder 30 */
-
- vertical[2] = (UINT8) columnar[16]; /* remainder 3 */
- vertical[6] = (UINT8) columnar[17]; /* remainder 7 */
- vertical[10] = (UINT8) columnar[18]; /* remainder 11 */
- vertical[14] = (UINT8) columnar[19]; /* remainder 15 */
- vertical[18] = (UINT8) columnar[20]; /* remainder 19 */
- vertical[22] = (UINT8) columnar[21]; /* remainder 23 */
- vertical[26] = (UINT8) columnar[22]; /* remainder 27 */
- vertical[30] = (UINT8) columnar[23]; /* remainder 31 */
-
- vertical[3] = (UINT8) columnar[24]; /* remainder 4 */
- vertical[7] = (UINT8) columnar[25]; /* remainder 8 */
- vertical[11] = (UINT8) columnar[26]; /* remainder 12 */
- vertical[15] = (UINT8) columnar[27]; /* remainder 16 */
- vertical[19] = (UINT8) columnar[28]; /* remainder 20 */
- vertical[23] = (UINT8) columnar[29]; /* remainder 24 */
- vertical[27] = (UINT8) columnar[30]; /* remainder 28 */
- vertical[31] = (UINT8) columnar[31]; /* remainder 32 */
-
- vertical[32] = (UINT8) columnar[32]; /* remainder 63 */
- vertical[36] = (UINT8) columnar[33]; /* remainder 59 */
- vertical[40] = (UINT8) columnar[34]; /* remainder 55 */
- vertical[44] = (UINT8) columnar[35]; /* remainder 51 */
- vertical[48] = (UINT8) columnar[36]; /* remainder 47 */
- vertical[52] = (UINT8) columnar[37]; /* remainder 43 */
- vertical[56] = (UINT8) columnar[38]; /* remainder 39 */
- vertical[60] = (UINT8) columnar[39]; /* remainder 35 */
-
- vertical[33] = (UINT8) columnar[40]; /* remainder 62 */
- vertical[37] = (UINT8) columnar[41]; /* remainder 58 */
- vertical[41] = (UINT8) columnar[42]; /* remainder 54 */
- vertical[45] = (UINT8) columnar[43]; /* remainder 50 */
- vertical[49] = (UINT8) columnar[44]; /* remainder 46 */
- vertical[53] = (UINT8) columnar[45]; /* remainder 42 */
- vertical[57] = (UINT8) columnar[46]; /* remainder 38 */
- vertical[61] = (UINT8) columnar[47]; /* remainder 34 */
-
- vertical[34] = (UINT8) columnar[48]; /* remainder 61 */
- vertical[38] = (UINT8) columnar[49]; /* remainder 57 */
- vertical[42] = (UINT8) columnar[50]; /* remainder 53 */
- vertical[46] = (UINT8) columnar[51]; /* remainder 49 */
- vertical[50] = (UINT8) columnar[52]; /* remainder 45 */
- vertical[54] = (UINT8) columnar[53]; /* remainder 41 */
- vertical[58] = (UINT8) columnar[54]; /* remainder 37 */
- vertical[62] = (UINT8) columnar[55]; /* remainder 33 */
-
- vertical[35] = (UINT8) columnar[56]; /* remainder 60 */
- vertical[39] = (UINT8) columnar[57]; /* remainder 56 */
- vertical[43] = (UINT8) columnar[58]; /* remainder 52 */
- vertical[47] = (UINT8) columnar[59]; /* remainder 48 */
- vertical[51] = (UINT8) columnar[60]; /* remainder 44 */
- vertical[55] = (UINT8) columnar[61]; /* remainder 40 */
- vertical[59] = (UINT8) columnar[62]; /* remainder 36 */
- vertical[63] = (UINT8) columnar[63]; /* remainder 32 */
-
- return;
-}
-#endif
-
-#endif
-
-
-
-#ifdef HAVE_SSE2
typedef void (*Unpacker_T) (__m128i* __restrict__, const __m128i* __restrict__);
-#else
-typedef void (*Unpacker_T) (UINT4* __restrict__, const UINT4* __restrict__);
#endif
@@ -13482,8 +13996,20 @@ static Unpacker_T unpacker_table[33] =
unpack_21, unpack_22, unpack_23, unpack_24,
unpack_25, unpack_26, unpack_27, unpack_28,
unpack_29, unpack_30, unpack_31, unpack_32};
+
+#elif defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+static Unpacker_T unpacker_all_table[33] =
+ {unpack_00,
+ unpack_00, unpack_02, unpack_00, unpack_04,
+ unpack_00, unpack_06, unpack_00, unpack_08,
+ unpack_00, unpack_10, unpack_00, unpack_12,
+ unpack_00, unpack_14, unpack_00, unpack_16,
+ unpack_00, unpack_18, unpack_00, unpack_20,
+ unpack_00, unpack_22, unpack_00, unpack_24,
+ unpack_00, unpack_26, unpack_00, unpack_28,
+ unpack_00, unpack_30, unpack_00, unpack_32};
+
#else
-#ifdef HAVE_SSE2
static Unpacker_T unpacker_all_table[34] =
{unpack_00, unpack_00,
unpack_02_fwd, unpack_02_rev, unpack_04_fwd, unpack_04_rev,
@@ -13600,19 +14126,6 @@ static Unpacker_T unpacker_table[17][17] =
unpack_00_0},
};
-
-#else
-static Unpacker_T unpacker_all_table[33] =
- {unpack_00,
- unpack_00, unpack_02, unpack_00, unpack_04,
- unpack_00, unpack_06, unpack_00, unpack_08,
- unpack_00, unpack_10, unpack_00, unpack_12,
- unpack_00, unpack_14, unpack_00, unpack_16,
- unpack_00, unpack_18, unpack_00, unpack_20,
- unpack_00, unpack_22, unpack_00, unpack_24,
- unpack_00, unpack_26, unpack_00, unpack_28,
- unpack_00, unpack_30, unpack_00, unpack_32};
-#endif
#endif
@@ -13629,7 +14142,12 @@ Bitpack64_read_two (UINT4 *end0, Storedoligomer_T oligo, UINT4 *bitpackptrs, UIN
Storedoligomer_T bmer;
UINT4 *info, nwritten, packsize_div2;
int remainder0, remainder1, column;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ UINT4 offset0, offset1;
+ UINT4 ptr;
+ int remainder, row, k, i;
+ UINT4 diffs[BLOCKSIZE+1], *bitpack;
+#else
__m128i diffs[4]; /* Need to provide space for 8 rows (or 2 128-bit registers) for ptr and for end0 */
int delta, row0, row1;
#ifdef BRANCH_FREE_QTR_BLOCK
@@ -13637,12 +14155,6 @@ Bitpack64_read_two (UINT4 *end0, Storedoligomer_T oligo, UINT4 *bitpackptrs, UIN
#endif
__m128i *bitpack;
UINT4 *_diffs;
-
-#else
- UINT4 offset0, offset1;
- UINT4 ptr;
- int remainder, row, k, i;
- UINT4 diffs[BLOCKSIZE+1], *bitpack;
#endif
#ifdef DEBUG
UINT4 offsets[BLOCKSIZE+1];
@@ -13654,119 +14166,52 @@ Bitpack64_read_two (UINT4 *end0, Storedoligomer_T oligo, UINT4 *bitpackptrs, UIN
debug(printf("Entered Bitpack64_read_two with oligo %u => bmer %u\n",oligo,bmer));
+#ifdef WORDS_BIGENDIAN
+ nwritten = Bigendian_convert_uint(info[0]); /* In 128-bit registers */
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten);
+
+#elif !defined(HAVE_SSE2)
nwritten = info[0]; /* In 128-bit registers */
-#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-#else
bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-#endif
+ packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+#else
+ nwritten = info[0]; /* In 128-bit registers */
+ bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
/* packsize = (info[METAINFO_SIZE] - nwritten)*2; */
packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+#endif
remainder0 = oligo % BLOCKSIZE;
remainder1 = remainder0 + 1;
+ debug(printf("nwritten %u, packsize %d\n",nwritten,packsize_div2 * 2));
debug(Bitpack64_block_offsets(offsets,oligo,bitpackptrs,bitpackcomp));
-#ifdef HAVE_SSE2
- _diffs = (UINT4 *) diffs; /* Assumes a dummy register in diffs[0] */
-
-#ifdef BRANCH_FREE_QTR_BLOCK
- psums[0] = psums[1] = info[1];
- psums[2] = psums[3] = psums[4] = info[METAINFO_SIZE+1];
-
- delta = 31 - abs(remainder1 - 32);
- column = get_column(delta);
- row = get_row(delta);
- debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block_1,delta,column,row));
-
- (unpacker_table[packsize_div2][column*4 + quarter_block_1])(diffs,bitpack);
- *end0 = psums[quarter_block_1] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
-
-
- delta = 31 - abs(remainder0 - 32);
- column = get_column(delta);
- row = get_row(delta);
- debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block_0,delta,column,row));
-
- (unpacker_table[packsize_div2][column*4 + quarter_block_0])(diffs,bitpack);
- return psums[quarter_block_0] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
-
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#ifdef WORDS_BIGENDIAN
+ offset0 = Bigendian_convert_uint(info[1]);
+ offset1 = Bigendian_convert_uint(info[METAINFO_SIZE+1]);
#else
-
- if (remainder0 < 16) {
- /* Quarter-block 0 */
- delta = remainder0 - 1;
- column = get_column(delta);
- row0 = get_row(delta);
- row1 = get_row(delta + 1);
- (unpacker_table[packsize_div2][column*4 + 0])(diffs,bitpack);
-
- _diffs = (UINT4 *) &(diffs[2]);
- assign_sum_fwd(*end0,info[1],_diffs,row1);
-
- _diffs = (UINT4 *) &(diffs[0]);
- return_sum_fwd(info[1],_diffs,row0);
-
- } else if (remainder0 < 32) {
- /* Quarter-block 1 */
- delta = remainder0 - 1;
- column = get_column(delta);
- row0 = get_row(delta);
- row1 = get_row(delta + 1);
- (unpacker_table[packsize_div2][column*4 + 1])(diffs,bitpack);
-
- _diffs = (UINT4 *) &(diffs[2]);
- assign_sum_fwd(*end0,info[1],_diffs,row1);
-
- _diffs = (UINT4 *) &(diffs[0]);
- return_sum_fwd(info[1],_diffs,row0);
-
- } else if (remainder0 < 48) {
- /* Quarter-block 2 */
- delta = 63 - remainder1;
- column = get_column(delta);
- row1 = get_row(delta);
- row0 = get_row(delta + 1);
- (unpacker_table[packsize_div2][column*4 + 2])(diffs,bitpack);
-
- _diffs = (UINT4 *) &(diffs[0]);
- assign_sum_rev(*end0,info[METAINFO_SIZE+1],_diffs,row1);
-
- _diffs = (UINT4 *) &(diffs[2]);
- return_sum_rev(info[METAINFO_SIZE+1],_diffs,row0);
-
- } else {
- /* Quarter-block 3 */
- delta = 63 - remainder1;
- column = get_column(delta);
- row1 = get_row(delta);
- row0 = get_row(delta + 1);
- (unpacker_table[packsize_div2][column*4 + 3])(diffs,bitpack);
-
- _diffs = (UINT4 *) &(diffs[0]);
- assign_sum_rev(*end0,info[METAINFO_SIZE+1],_diffs,row1);
-
- _diffs = (UINT4 *) &(diffs[2]);
- return_sum_rev(info[METAINFO_SIZE+1],_diffs,row0);
- }
-
-#endif
-
-#else /* HAVE_SSE2 */
-
offset0 = info[1];
offset1 = info[METAINFO_SIZE+1];
+#endif
/* Unpack all 64 diffs for non-SIMD */
(unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
#ifdef DEBUG
+#ifdef WORDS_BIGENDIAN
+ printf("oligo: %08X, remainder %d, offset0 %u, offset1 %u\n",
+ oligo,oligo % BLOCKSIZE,Bigendian_convert_uint(info[1]),Bigendian_convert_uint(info[METAINFO_SIZE+1]));
+#else
printf("oligo: %08X, remainder %d, offset0 %u, offset1 %u\n",
oligo,oligo % BLOCKSIZE,info[1],info[METAINFO_SIZE+1]);
+#endif
printf("bitpack:\n");
+
for (i = 1; i <= BLOCKSIZE; i++) {
printf("%d ",diffs[i]);
if (i % (BLOCKSIZE/4) == 0) {
@@ -13912,93 +14357,12 @@ Bitpack64_read_two (UINT4 *end0, Storedoligomer_T oligo, UINT4 *bitpackptrs, UIN
return ptr;
-#endif /* HAVE_SSE2 */
-
-}
-#endif
-
-
-#ifdef LARGE_GENOMES
-/* bitpackpages: A list of b-mers (12-mers by default), ending with -1U */
-UINT8
-Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
- UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
- Storedoligomer_T bmer;
- UINT4 *info, nwritten;
- UINT8 offset0, offset1;
- UINT4 packsize_div2;
- int remainder0, remainder1, column;
-#ifdef HAVE_SSE2
- int delta, row0, row1;
-#ifdef BRANCH_FREE_ROW_SUM
- __m128i diffs[3];
-#else
- __m128i diffs[4]; /* Need to provide space for 8 rows (or 2 128-bit registers) for ptr and for end0 */
-#endif
-#ifdef BRANCH_FREE_QTR_BLOCK
- UINT8 psums[5]; /* Need 5 to handle case where remainder == 64 */
-#endif
- __m128i *bitpack;
- UINT4 *_diffs;
-
-#else
- UINT4 ptr;
- int remainder, row, k, i;
- UINT4 diffs[BLOCKSIZE+1], *bitpack;
-#endif
- UINT4 *pageptr;
-#ifdef DEBUG
- UINT4 offsets[BLOCKSIZE+1];
-#endif
-
-
- bmer = oligo/BLOCKSIZE;
- info = &(bitpackptrs[bmer * METAINFO_SIZE]);
-
- debug(printf("Entered Bitpack64_read_two_huge with oligo %u => bmer %u\n",oligo,bmer));
-
- nwritten = info[0];
-#ifdef HAVE_SSE2
- bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
-#else
- bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
-#endif
-
- offset0 = (UINT8) info[1];
- offset1 = (UINT8) info[METAINFO_SIZE+1];
- debug(printf("offsets are %llu, %llu\n",offset0,offset1));
-
- if (bitpackpages != NULL) {
- pageptr = bitpackpages;
- debug(printf(" compare bmer %u with pageptr %u\n",bmer,*pageptr));
- while (bmer >= *pageptr) {
- offset0 += POSITIONS_PAGE;
- offset1 += POSITIONS_PAGE;
- pageptr++;
- }
-
- if (bmer + 1 >= *pageptr) {
- offset1 += POSITIONS_PAGE;
- /* pageptr++; */
- }
- }
- debug(printf("offsets are %llu, %llu\n",offset0,offset1));
-
-
- /* packsize = (info[METAINFO_SIZE] - nwritten)*2; */
- packsize_div2 = (info[METAINFO_SIZE] - nwritten);
-
- remainder0 = oligo % BLOCKSIZE;
- remainder1 = remainder0 + 1;
-
- /* debug(Bitpack64_block_offsets_huge(offsets,oligo,bitpackpages,bitpackptrs,bitpackcomp)); */
-
-#ifdef HAVE_SSE2
+#else /* littleendian and SSE2 */
_diffs = (UINT4 *) diffs; /* Assumes a dummy register in diffs[0] */
#ifdef BRANCH_FREE_QTR_BLOCK
- psums[0] = psums[1] = offset0;
- psums[2] = psums[3] = psums[4] = offset1;
+ psums[0] = psums[1] = info[1];
+ psums[2] = psums[3] = psums[4] = info[METAINFO_SIZE+1];
delta = 31 - abs(remainder1 - 32);
column = get_column(delta);
@@ -14006,7 +14370,7 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block_1,delta,column,row));
(unpacker_table[packsize_div2][column*4 + quarter_block_1])(diffs,bitpack);
- *end0 = psums[quarter_block_1] + (INT4) (_diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4]);
+ *end0 = psums[quarter_block_1] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
delta = 31 - abs(remainder0 - 32);
@@ -14015,7 +14379,7 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block_0,delta,column,row));
(unpacker_table[packsize_div2][column*4 + quarter_block_0])(diffs,bitpack);
- return psums[quarter_block_0] + (INT4) (_diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4]);
+ return psums[quarter_block_0] + _diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4];
#else
@@ -14025,14 +14389,13 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
column = get_column(delta);
row0 = get_row(delta);
row1 = get_row(delta + 1);
- debug(printf("quarter_block 0, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
(unpacker_table[packsize_div2][column*4 + 0])(diffs,bitpack);
_diffs = (UINT4 *) &(diffs[2]);
- assign_sum_fwd(*end0,offset0,_diffs,row1);
+ assign_sum_fwd(*end0,info[1],_diffs,row1);
_diffs = (UINT4 *) &(diffs[0]);
- return_sum_fwd(offset0,_diffs,row0);
+ return_sum_fwd(info[1],_diffs,row0);
} else if (remainder0 < 32) {
/* Quarter-block 1 */
@@ -14040,14 +14403,13 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
column = get_column(delta);
row0 = get_row(delta);
row1 = get_row(delta + 1);
- debug(printf("quarter_block 1, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
(unpacker_table[packsize_div2][column*4 + 1])(diffs,bitpack);
_diffs = (UINT4 *) &(diffs[2]);
- assign_sum_fwd(*end0,offset0,_diffs,row1);
+ assign_sum_fwd(*end0,info[1],_diffs,row1);
_diffs = (UINT4 *) &(diffs[0]);
- return_sum_fwd(offset0,_diffs,row0);
+ return_sum_fwd(info[1],_diffs,row0);
} else if (remainder0 < 48) {
/* Quarter-block 2 */
@@ -14055,14 +14417,13 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
column = get_column(delta);
row1 = get_row(delta);
row0 = get_row(delta + 1);
- debug(printf("quarter_block 2, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
(unpacker_table[packsize_div2][column*4 + 2])(diffs,bitpack);
_diffs = (UINT4 *) &(diffs[0]);
- assign_sum_rev(*end0,offset1,_diffs,row1);
+ assign_sum_rev(*end0,info[METAINFO_SIZE+1],_diffs,row1);
_diffs = (UINT4 *) &(diffs[2]);
- return_sum_rev(offset1,_diffs,row0);
+ return_sum_rev(info[METAINFO_SIZE+1],_diffs,row0);
} else {
/* Quarter-block 3 */
@@ -14070,18 +14431,124 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
column = get_column(delta);
row1 = get_row(delta);
row0 = get_row(delta + 1);
- debug(printf("quarter_block 3, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
(unpacker_table[packsize_div2][column*4 + 3])(diffs,bitpack);
_diffs = (UINT4 *) &(diffs[0]);
- assign_sum_rev(*end0,offset1,_diffs,row1);
+ assign_sum_rev(*end0,info[METAINFO_SIZE+1],_diffs,row1);
_diffs = (UINT4 *) &(diffs[2]);
- return_sum_rev(offset1,_diffs,row0);
+ return_sum_rev(info[METAINFO_SIZE+1],_diffs,row0);
}
+
+#endif /* BRANCH_FREE_QTR_BLOCK */
+#endif /* HAVE_SSE2 */
+
+}
+#endif
+
+
+#ifdef LARGE_GENOMES
+/* bitpackpages: A list of b-mers (12-mers by default), ending with -1U */
+UINT8
+Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
+ UINT4 *bitpackpages, UINT4 *bitpackptrs, UINT4 *bitpackcomp) {
+ Storedoligomer_T bmer;
+ UINT4 *info, nwritten;
+ UINT8 offset0, offset1;
+ UINT4 packsize_div2;
+ int remainder0, remainder1, column;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ UINT4 ptr;
+ int remainder, row, k, i;
+ UINT4 diffs[BLOCKSIZE+1], *bitpack;
+#else
+ int delta, row0, row1;
+#ifdef BRANCH_FREE_ROW_SUM
+ __m128i diffs[3];
+#else
+ __m128i diffs[4]; /* Need to provide space for 8 rows (or 2 128-bit registers) for ptr and for end0 */
+#endif
+#ifdef BRANCH_FREE_QTR_BLOCK
+ UINT8 psums[5]; /* Need 5 to handle case where remainder == 64 */
+#endif
+ __m128i *bitpack;
+ UINT4 *_diffs;
+#endif
+ UINT4 *pageptr;
+#ifdef DEBUG
+ UINT4 offsets[BLOCKSIZE+1];
+#endif
+
+
+ bmer = oligo/BLOCKSIZE;
+ info = &(bitpackptrs[bmer * METAINFO_SIZE]);
+
+ debug(printf("Entered Bitpack64_read_two_huge with oligo %u => bmer %u\n",oligo,bmer));
+
+#ifdef WORDS_BIGENDIAN
+ nwritten = Bigendian_convert_uint(info[0]);
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ offset0 = (UINT8) Bigendian_convert_uint(info[1]);
+ offset1 = (UINT8) Bigendian_convert_uint(info[METAINFO_SIZE+1]);
+
+#elif !defined(HAVE_SSE2)
+ nwritten = info[0];
+ bitpack = (UINT4 *) &(bitpackcomp[nwritten*4]);
+ offset0 = (UINT8) info[1];
+ offset1 = (UINT8) info[METAINFO_SIZE+1];
+
+#else
+ nwritten = info[0];
+ bitpack = (__m128i *) &(bitpackcomp[nwritten*4]);
+ offset0 = (UINT8) info[1];
+ offset1 = (UINT8) info[METAINFO_SIZE+1];
#endif
-#else /* HAVE_SSE2 */
+ debug(printf("offsets are %llu, %llu\n",offset0,offset1));
+
+#ifdef WORDS_BIGENDIAN
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ debug(printf(" compare bmer %u with pageptr %u\n",bmer,*pageptr));
+ while (bmer >= Bigendian_convert_uint(*pageptr)) {
+ offset0 += POSITIONS_PAGE;
+ offset1 += POSITIONS_PAGE;
+ pageptr++;
+ }
+
+ if (bmer + 1 >= Bigendian_convert_uint(*pageptr)) {
+ offset1 += POSITIONS_PAGE;
+ /* pageptr++; */
+ }
+ }
+ debug(printf("offsets are %llu, %llu\n",offset0,offset1));
+ packsize_div2 = (Bigendian_convert_uint(info[METAINFO_SIZE]) - nwritten);
+
+#else
+ if (bitpackpages != NULL) {
+ pageptr = bitpackpages;
+ debug(printf(" compare bmer %u with pageptr %u\n",bmer,*pageptr));
+ while (bmer >= *pageptr) {
+ offset0 += POSITIONS_PAGE;
+ offset1 += POSITIONS_PAGE;
+ pageptr++;
+ }
+
+ if (bmer + 1 >= *pageptr) {
+ offset1 += POSITIONS_PAGE;
+ /* pageptr++; */
+ }
+ }
+ debug(printf("offsets are %llu, %llu\n",offset0,offset1));
+ packsize_div2 = (info[METAINFO_SIZE] - nwritten);
+#endif
+
+ remainder0 = oligo % BLOCKSIZE;
+ remainder1 = remainder0 + 1;
+
+ /* debug(Bitpack64_block_offsets_huge(offsets,oligo,bitpackpages,bitpackptrs,bitpackcomp)); */
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
/* Unpack all 64 diffs for non-SIMD */
(unpacker_all_table[packsize_div2*2])(&(diffs[1]),bitpack);
@@ -14237,6 +14704,95 @@ Bitpack64_read_two_huge (UINT8 *end0, Storedoligomer_T oligo,
return ptr;
-#endif /* HAVE_SSE2 */
+
+#else /* littleendian and SSE2 */
+ _diffs = (UINT4 *) diffs; /* Assumes a dummy register in diffs[0] */
+
+#ifdef BRANCH_FREE_QTR_BLOCK
+ psums[0] = psums[1] = offset0;
+ psums[2] = psums[3] = psums[4] = offset1;
+
+ delta = 31 - abs(remainder1 - 32);
+ column = get_column(delta);
+ row = get_row(delta);
+ debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block_1,delta,column,row));
+
+ (unpacker_table[packsize_div2][column*4 + quarter_block_1])(diffs,bitpack);
+ *end0 = psums[quarter_block_1] + (INT4) (_diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4]);
+
+
+ delta = 31 - abs(remainder0 - 32);
+ column = get_column(delta);
+ row = get_row(delta);
+ debug(printf("quarter-block %d, delta %d, column %d, row %d\n",quarter_block_0,delta,column,row));
+
+ (unpacker_table[packsize_div2][column*4 + quarter_block_0])(diffs,bitpack);
+ return psums[quarter_block_0] + (INT4) (_diffs[row+1] + _diffs[row+2] + _diffs[row+3] + _diffs[row+4]);
+
+#else
+
+ if (remainder0 < 16) {
+ /* Quarter-block 0 */
+ delta = remainder0 - 1;
+ column = get_column(delta);
+ row0 = get_row(delta);
+ row1 = get_row(delta + 1);
+ debug(printf("quarter_block 0, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
+ (unpacker_table[packsize_div2][column*4 + 0])(diffs,bitpack);
+
+ _diffs = (UINT4 *) &(diffs[2]);
+ assign_sum_fwd(*end0,offset0,_diffs,row1);
+
+ _diffs = (UINT4 *) &(diffs[0]);
+ return_sum_fwd(offset0,_diffs,row0);
+
+ } else if (remainder0 < 32) {
+ /* Quarter-block 1 */
+ delta = remainder0 - 1;
+ column = get_column(delta);
+ row0 = get_row(delta);
+ row1 = get_row(delta + 1);
+ debug(printf("quarter_block 1, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
+ (unpacker_table[packsize_div2][column*4 + 1])(diffs,bitpack);
+
+ _diffs = (UINT4 *) &(diffs[2]);
+ assign_sum_fwd(*end0,offset0,_diffs,row1);
+
+ _diffs = (UINT4 *) &(diffs[0]);
+ return_sum_fwd(offset0,_diffs,row0);
+
+ } else if (remainder0 < 48) {
+ /* Quarter-block 2 */
+ delta = 63 - remainder1;
+ column = get_column(delta);
+ row1 = get_row(delta);
+ row0 = get_row(delta + 1);
+ debug(printf("quarter_block 2, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
+ (unpacker_table[packsize_div2][column*4 + 2])(diffs,bitpack);
+
+ _diffs = (UINT4 *) &(diffs[0]);
+ assign_sum_rev(*end0,offset1,_diffs,row1);
+
+ _diffs = (UINT4 *) &(diffs[2]);
+ return_sum_rev(offset1,_diffs,row0);
+
+ } else {
+ /* Quarter-block 3 */
+ delta = 63 - remainder1;
+ column = get_column(delta);
+ row1 = get_row(delta);
+ row0 = get_row(delta + 1);
+ debug(printf("quarter_block 3, remainder %d, delta %d, column %d, row %d\n",remainder0,delta,column,row0));
+ (unpacker_table[packsize_div2][column*4 + 3])(diffs,bitpack);
+
+ _diffs = (UINT4 *) &(diffs[0]);
+ assign_sum_rev(*end0,offset1,_diffs,row1);
+
+ _diffs = (UINT4 *) &(diffs[2]);
+ return_sum_rev(offset1,_diffs,row0);
+ }
+
+#endif /* BRANCH_FREE_QTR_BLOCK */
+#endif /* HAVE_SSE2 */
}
#endif
diff --git a/src/bytecoding.c b/src/bytecoding.c
index 5c7bbcf..a311650 100644
--- a/src/bytecoding.c
+++ b/src/bytecoding.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: bytecoding.c 153444 2014-11-18 01:24:55Z twu $";
+static char rcsid[] = "$Id: bytecoding.c 170515 2015-07-23 23:03:24Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -445,6 +445,19 @@ Bytecoding_read (UINT4 key, unsigned char *bytes, UINT4 *exceptions, int nexcept
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
+ highi,exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
+ return Bigendian_convert_uint(exceptions[2*middlei+1]);
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
highi,exceptions[2*highi],key));
@@ -456,6 +469,7 @@ Bytecoding_read (UINT4 key, unsigned char *bytes, UINT4 *exceptions, int nexcept
debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
return exceptions[2*middlei+1];
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
@@ -478,13 +492,31 @@ Bytecoding_read_wguide (UINT4 key, unsigned char *bytes, UINT4 *guide, UINT4 *ex
} else {
guidei = key/guide_interval;
+#ifdef WORDS_BIGENDIAN
+ lowi = Bigendian_convert_uint(guide[guidei]);
+ highi = Bigendian_convert_uint(guide[guidei+1]);
+#else
lowi = guide[guidei];
highi = guide[guidei+1];
+#endif
debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,key));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
+ highi,exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
+ return Bigendian_convert_uint(exceptions[2*middlei+1]);
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
highi,exceptions[2*highi],key));
@@ -496,6 +528,7 @@ Bytecoding_read_wguide (UINT4 key, unsigned char *bytes, UINT4 *guide, UINT4 *ex
debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
return exceptions[2*middlei+1];
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
@@ -525,6 +558,19 @@ Bytecoding_lcpchilddc_lcp (UINT4 key, unsigned char *bytes, UINT4 *exceptions, i
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
+ highi,exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
+ return Bigendian_convert_uint(exceptions[2*middlei+1]);
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
highi,exceptions[2*highi],key));
@@ -536,6 +582,7 @@ Bytecoding_lcpchilddc_lcp (UINT4 key, unsigned char *bytes, UINT4 *exceptions, i
debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
return exceptions[2*middlei+1];
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
@@ -580,13 +627,31 @@ Bytecoding_lcpchilddc_child_up (UINT4 key, unsigned char *bytes, UINT4 *guide, U
} else {
guidei = key/guide_interval;
+#ifdef WORDS_BIGENDIAN
+ lowi = Bigendian_convert_uint(guide[guidei]);
+ highi = Bigendian_convert_uint(guide[guidei+1]);
+#else
lowi = guide[guidei];
highi = guide[guidei+1];
+#endif
debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,key));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
+ highi,exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
+ return key - Bigendian_convert_uint(exceptions[2*middlei+1]);
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
highi,exceptions[2*highi],key));
@@ -598,6 +663,7 @@ Bytecoding_lcpchilddc_child_up (UINT4 key, unsigned char *bytes, UINT4 *guide, U
debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
return key - exceptions[2*middlei+1];
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
@@ -622,13 +688,31 @@ Bytecoding_lcpchilddc_child_next (UINT4 key, unsigned char *bytes, UINT4 *guide,
} else {
guidei = key/guide_interval;
+#ifdef WORDS_BIGENDIAN
+ lowi = Bigendian_convert_uint(guide[guidei]);
+ highi = Bigendian_convert_uint(guide[guidei+1]);
+#else
lowi = guide[guidei];
highi = guide[guidei+1];
+#endif
debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,key));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
+ highi,exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
+ return Bigendian_convert_uint(exceptions[2*middlei+1]) + key + 1;
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
highi,exceptions[2*highi],key));
@@ -640,6 +724,7 @@ Bytecoding_lcpchilddc_child_next (UINT4 key, unsigned char *bytes, UINT4 *guide,
debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
return exceptions[2*middlei+1] + key + 1;
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
@@ -652,7 +737,7 @@ Bytecoding_lcpchilddc_child_next (UINT4 key, unsigned char *bytes, UINT4 *guide,
UINT4
-Bytecoding_lcpchilddc_lcp_next (UINT4 key, unsigned char *bytes, UINT4 *child_guide,
+Bytecoding_lcpchilddc_lcp_next (UINT4 *child_next, UINT4 key, unsigned char *bytes, UINT4 *child_guide,
UINT4 *child_exceptions, int child_guide_interval,
UINT4 *lcp_exceptions, int n_lcp_exceptions) {
UINT8 blocki = key/2; /* Needs to be UINT8, because 5 * 2^32 will overflow UINT4 */
@@ -663,17 +748,37 @@ Bytecoding_lcpchilddc_lcp_next (UINT4 key, unsigned char *bytes, UINT4 *child_gu
if ((byte = block[3 + (key % 2)]) < 255) {
debug10(printf("value %d < 255\n",byte));
- return Bytecoding_lcpchilddc_lcp((UINT4) byte + key + 1,bytes,lcp_exceptions,n_lcp_exceptions);
+ *child_next = (UINT4) byte + key + 1;
+ return Bytecoding_lcpchilddc_lcp(*child_next,bytes,lcp_exceptions,n_lcp_exceptions);
} else {
guidei = key/child_guide_interval;
+#ifdef WORDS_BIGENDIAN
+ lowi = Bigendian_convert_uint(child_guide[guidei]);
+ highi = Bigendian_convert_uint(child_guide[guidei+1]);
+#else
lowi = child_guide[guidei];
highi = child_guide[guidei+1];
+#endif
debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,key));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,child_exceptions[2*lowi],middlei,child_exceptions[2*middlei],
+ highi,child_exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(child_exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(child_exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,child_exceptions[2*middlei+1]));
+ *child_next = Bigendian_convert_uint(child_exceptions[2*middlei+1]) + key + 1;
+ return Bytecoding_lcpchilddc_lcp(*child_next,bytes,lcp_exceptions,n_lcp_exceptions);
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,child_exceptions[2*lowi],middlei,child_exceptions[2*middlei],
highi,child_exceptions[2*highi],key));
@@ -683,9 +788,10 @@ Bytecoding_lcpchilddc_lcp_next (UINT4 key, unsigned char *bytes, UINT4 *child_gu
lowi = middlei + 1;
} else {
debug10(printf("binary search returns %d => %u\n",middlei,child_exceptions[2*middlei+1]));
- return Bytecoding_lcpchilddc_lcp(child_exceptions[2*middlei+1] + key + 1,bytes,
- lcp_exceptions,n_lcp_exceptions);
+ *child_next = child_exceptions[2*middlei+1] + key + 1;
+ return Bytecoding_lcpchilddc_lcp(*child_next,bytes,lcp_exceptions,n_lcp_exceptions);
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
@@ -720,13 +826,31 @@ Bytecoding_lcpchilddcn_child_up (bool *nextp, UINT4 key, unsigned char *bytes, U
} else {
guidei = key/guide_interval;
+#ifdef WORDS_BIGENDIAN
+ lowi = Bigendian_convert_uint(guide[guidei]);
+ highi = Bigendian_convert_uint(guide[guidei+1]);
+#else
lowi = guide[guidei];
highi = guide[guidei+1];
+#endif
debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,key));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
+ highi,exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
+ return key - Bigendian_convert_uint(exceptions[2*middlei+1]);
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
highi,exceptions[2*highi],key));
@@ -738,6 +862,7 @@ Bytecoding_lcpchilddcn_child_up (bool *nextp, UINT4 key, unsigned char *bytes, U
debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
return key - exceptions[2*middlei+1];
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
@@ -773,13 +898,31 @@ Bytecoding_lcpchilddcn_child_next (bool *nextp, UINT4 key, unsigned char *bytes,
} else {
guidei = key/guide_interval;
+#ifdef WORDS_BIGENDIAN
+ lowi = Bigendian_convert_uint(guide[guidei]);
+ highi = Bigendian_convert_uint(guide[guidei+1]);
+#else
lowi = guide[guidei];
highi = guide[guidei+1];
+#endif
debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,key));
while (lowi < highi) {
middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+ debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
+ lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
+ highi,exceptions[2*highi],key));
+ if (key < Bigendian_convert_uint(exceptions[2*middlei])) {
+ highi = middlei;
+ } else if (key > Bigendian_convert_uint(exceptions[2*middlei])) {
+ lowi = middlei + 1;
+ } else {
+ debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
+ return Bigendian_convert_uint(exceptions[2*middlei+1]) + key + 1;
+ }
+#else
debug10(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n",
lowi,exceptions[2*lowi],middlei,exceptions[2*middlei],
highi,exceptions[2*highi],key));
@@ -791,6 +934,7 @@ Bytecoding_lcpchilddcn_child_next (bool *nextp, UINT4 key, unsigned char *bytes,
debug10(printf("binary search returns %d => %u\n",middlei,exceptions[2*middlei+1]));
return exceptions[2*middlei+1] + key + 1;
}
+#endif
}
/* debug10(printf("binary search returns %d => %u\n",highi,exceptions[highi+1])); */
diff --git a/src/bytecoding.h b/src/bytecoding.h
index 089e93c..daf21ec 100644
--- a/src/bytecoding.h
+++ b/src/bytecoding.h
@@ -1,4 +1,4 @@
-/* $Id: bytecoding.h 157221 2015-01-22 18:38:57Z twu $ */
+/* $Id: bytecoding.h 170515 2015-07-23 23:03:24Z twu $ */
#ifndef BYTECODING_INCLUDED
#define BYTECODING_INCLUDED
@@ -37,7 +37,8 @@ Bytecoding_lcpchilddc_child_up (UINT4 key, unsigned char *bytes, UINT4 *guide, U
extern UINT4
Bytecoding_lcpchilddc_child_next (UINT4 key, unsigned char *bytes, UINT4 *guide, UINT4 *exceptions, int guide_interval);
extern UINT4
-Bytecoding_lcpchilddc_lcp_next (UINT4 key, unsigned char *lcpchilddc, UINT4 *child_guide,
+Bytecoding_lcpchilddc_lcp_next (UINT4 *child_next, UINT4 key,
+ unsigned char *lcpchilddc, UINT4 *child_guide,
UINT4 *child_exceptions, int child_guide_interval,
UINT4 *lcp_exceptions, int n_lcp_exceptions);
diff --git a/src/compress.c b/src/compress.c
index e6e0b6e..8dc44ad 100644
--- a/src/compress.c
+++ b/src/compress.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: compress.c 157566 2015-01-28 00:02:04Z twu $";
+static char rcsid[] = "$Id: compress.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -31,10 +31,14 @@ static char rcsid[] = "$Id: compress.c 157566 2015-01-28 00:02:04Z twu $";
#include "mem.h" /* For Compress_new */
#include "assert.h"
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+/* Skip */
+#else
#include <emmintrin.h>
#endif
-#ifdef HAVE_SSSE3
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSSE3)
+/* Skip */
+#else
#include <tmmintrin.h>
#endif
#ifdef HAVE_SSE4_1
@@ -78,10 +82,10 @@ static char rcsid[] = "$Id: compress.c 157566 2015-01-28 00:02:04Z twu $";
#endif
-#ifdef HAVE_SSE2
-#define STEP_SIZE 128
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
#define STEP_SIZE 32
+#else
+#define STEP_SIZE 128
#endif
@@ -100,17 +104,17 @@ struct T {
void
Compress_free (T *old) {
if (*old) {
-#ifdef HAVE_SSE2
- _mm_free((*old)->shift_array[0]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
FREE((*old)->shift_array[0]);
+#else
+ _mm_free((*old)->shift_array[0]);
#endif
FREE((*old)->shift_array);
#if 0
-#ifdef HAVE_SSE2
- _mm_free((*old)->blocks);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
FREE((*old)->blocks);
+#else
+ _mm_free((*old)->blocks);
#endif
#endif
FREE(*old);
@@ -171,7 +175,26 @@ write_chars (Genomecomp_T high, Genomecomp_T low, Genomecomp_T flags) {
}
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+void
+Compress_print_blocks (Genomecomp_T *blocks, int nshift, int pos5, int pos3) {
+ int ptr, endptr;
+
+ endptr = (nshift + pos3)/32U*3; /* /STEP_SIZE*COMPRESS_BLOCKSIZE */
+ ptr = (nshift + pos5)/32U*3;
+
+ while (ptr <= endptr) {
+ printf("high: %08X low: %08X flags: %08X\t",
+ blocks[ptr],blocks[ptr+1],blocks[ptr+2]);
+ write_chars(blocks[ptr],blocks[ptr+1],blocks[ptr+2]);
+ printf("\n");
+ ptr += COMPRESS_BLOCKSIZE;
+ }
+ printf("\n");
+ return;
+}
+
+#else
void
Compress_print_blocks (Genomecomp_T *blocks, int nshift, int pos5, int pos3) {
int ptr, endptr;
@@ -279,25 +302,6 @@ Compress_print_one_block (Genomecomp_T *blocks) {
return;
}
-#else
-
-/* Not implemented */
-void
-Compress_print_blocks (Genomecomp_T *blocks, int nshift, int pos5, int pos3) {
- int ptr = 0;
- int nblocks = 0;
-
- while (ptr < nblocks*COMPRESS_BLOCKSIZE) {
- printf("high: %08X low: %08X flags: %08X\t",
- blocks[ptr],blocks[ptr+1],blocks[ptr+2]);
- write_chars(blocks[ptr],blocks[ptr+1],blocks[ptr+2]);
- printf("\n");
- ptr += COMPRESS_BLOCKSIZE;
- }
- printf("\n");
- return;
-}
-
#endif
@@ -316,14 +320,14 @@ Compress_new_fwd (char *gbuffer, Chrpos_T length) {
int c, i;
int in_counter = 0;
-#ifdef HAVE_SSE2
- new->nblocks = (length+127)/128U;
- new->shift_array = (Genomecomp_T **) MALLOC(STEP_SIZE * sizeof(Genomecomp_T *));
- new->shift_array[0] = (Genomecomp_T *) _mm_malloc(STEP_SIZE*(new->nblocks+1)*COMPRESS_BLOCKSIZE * sizeof(Genomecomp_T),16);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
new->nblocks = (length+31)/32U;
new->shift_array = (Genomecomp_T **) MALLOC(STEP_SIZE * sizeof(Genomecomp_T *));
new->shift_array[0] = (Genomecomp_T *) MALLOC(STEP_SIZE*(new->nblocks+1)*COMPRESS_BLOCKSIZE * sizeof(Genomecomp_T));
+#else
+ new->nblocks = (length+127)/128U;
+ new->shift_array = (Genomecomp_T **) MALLOC(STEP_SIZE * sizeof(Genomecomp_T *));
+ new->shift_array[0] = (Genomecomp_T *) _mm_malloc(STEP_SIZE*(new->nblocks+1)*COMPRESS_BLOCKSIZE * sizeof(Genomecomp_T),16);
#endif
#ifdef DEBUG14
new->querylength = length;
@@ -342,7 +346,39 @@ Compress_new_fwd (char *gbuffer, Chrpos_T length) {
position = 0U;
while (position < length) {
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ high = low = flags = 0U;
+ in_counter = 0;
+ while (position < length && in_counter < 32) {
+ c = gbuffer[position++];
+ high >>= 1;
+ low >>= 1;
+ flags >>= 1;
+
+ /* Assume that gbuffer is upper case */
+ switch /*(uppercaseCode[c])*/ (c) {
+ case 'A': /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
+ case 'C': /* high |= LEFT_CLEAR; */ low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
+ case 'G': high |= LEFT_SET; /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
+ case 'T': high |= LEFT_SET; low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
+ default: /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ flags |= LEFT_SET;
+ }
+ in_counter++;
+ }
+
+ while (in_counter < 32) {
+ high >>= 1;
+ low >>= 1;
+ flags >>= 1;
+ in_counter++;
+ }
+
+ /* Use old storage method */
+ new->blocks[ptr] = high;
+ new->blocks[ptr+1] = low;
+ new->blocks[ptr+2] = flags;
+
+#else
for (i = 0; i < 4; i++) {
/* Word i */
high = low = flags = 0U;
@@ -375,53 +411,21 @@ Compress_new_fwd (char *gbuffer, Chrpos_T length) {
new->blocks[ptr + i + 4] = low;
new->blocks[ptr + i + 8] = flags;
}
-
-#else
- high = low = flags = 0U;
- in_counter = 0;
- while (position < length && in_counter < 32) {
- c = gbuffer[position++];
- high >>= 1;
- low >>= 1;
- flags >>= 1;
-
- /* Assume that gbuffer is upper case */
- switch /*(uppercaseCode[c])*/ (c) {
- case 'A': /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
- case 'C': /* high |= LEFT_CLEAR; */ low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
- case 'G': high |= LEFT_SET; /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
- case 'T': high |= LEFT_SET; low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
- default: /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ flags |= LEFT_SET;
- }
- in_counter++;
- }
-
- while (in_counter < 32) {
- high >>= 1;
- low >>= 1;
- flags >>= 1;
- in_counter++;
- }
-
- /* Use old storage method */
- new->blocks[ptr] = high;
- new->blocks[ptr+1] = low;
- new->blocks[ptr+2] = flags;
#endif
ptr += COMPRESS_BLOCKSIZE;
}
-#ifdef HAVE_SSE2
- /* Compress_shift will access these values */
- new->blocks[ptr] = new->blocks[ptr+1] = new->blocks[ptr+2] = new->blocks[ptr+3] = 0U;
- new->blocks[ptr+4] = new->blocks[ptr+5] = new->blocks[ptr+6] = new->blocks[ptr+7] = 0U;
- new->blocks[ptr+8] = new->blocks[ptr+9] = new->blocks[ptr+10] = new->blocks[ptr+11] = 0U;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
/* Compress_shift will access these values */
new->blocks[ptr] = 0U;
new->blocks[ptr+1] = 0U;
new->blocks[ptr+2] = 0U;
+#else
+ /* Compress_shift will access these values */
+ new->blocks[ptr] = new->blocks[ptr+1] = new->blocks[ptr+2] = new->blocks[ptr+3] = 0U;
+ new->blocks[ptr+4] = new->blocks[ptr+5] = new->blocks[ptr+6] = new->blocks[ptr+7] = 0U;
+ new->blocks[ptr+8] = new->blocks[ptr+9] = new->blocks[ptr+10] = new->blocks[ptr+11] = 0U;
#endif
debug0(printf("Compress_new_fwd\n"));
@@ -446,14 +450,14 @@ Compress_new_rev (char *gbuffer, Chrpos_T length) {
int c, i;
int in_counter = 0;
-#ifdef HAVE_SSE2
- new->nblocks = (length+127)/128U;
- new->shift_array = (Genomecomp_T **) MALLOC(STEP_SIZE * sizeof(Genomecomp_T *));
- new->shift_array[0] = (Genomecomp_T *) _mm_malloc(STEP_SIZE*(new->nblocks+1)*COMPRESS_BLOCKSIZE * sizeof(Genomecomp_T),16);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
new->nblocks = (length+31)/32U;
new->shift_array = (Genomecomp_T **) MALLOC(STEP_SIZE * sizeof(Genomecomp_T *));
new->shift_array[0] = (Genomecomp_T *) MALLOC(STEP_SIZE*(new->nblocks+1)*COMPRESS_BLOCKSIZE * sizeof(Genomecomp_T));
+#else
+ new->nblocks = (length+127)/128U;
+ new->shift_array = (Genomecomp_T **) MALLOC(STEP_SIZE * sizeof(Genomecomp_T *));
+ new->shift_array[0] = (Genomecomp_T *) _mm_malloc(STEP_SIZE*(new->nblocks+1)*COMPRESS_BLOCKSIZE * sizeof(Genomecomp_T),16);
#endif
#ifdef DEBUG14
new->querylength = length;
@@ -472,7 +476,38 @@ Compress_new_rev (char *gbuffer, Chrpos_T length) {
position = length;
while (position > 0) {
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ high = low = flags = 0U;
+ in_counter = 0;
+ while (position > 0 && in_counter < 32) {
+ c = gbuffer[--position];
+ high >>= 1;
+ low >>= 1;
+ flags >>= 1;
+
+ /* Assume that gbuffer is upper case */
+ switch /*(uppercaseCode[c])*/ (c) {
+ case 'T': /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
+ case 'G': /* high |= LEFT_CLEAR; */ low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
+ case 'C': high |= LEFT_SET; /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
+ case 'A': high |= LEFT_SET; low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
+ default: /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ flags |= LEFT_SET;
+ }
+ in_counter++;
+ }
+
+ while (in_counter < 32) {
+ high >>= 1;
+ low >>= 1;
+ flags >>= 1;
+ in_counter++;
+ }
+
+ new->blocks[ptr] = high;
+ new->blocks[ptr+1] = low;
+ new->blocks[ptr+2] = flags;
+
+#else
for (i = 0; i < 4; i++) {
/* Word i */
high = low = flags = 0U;
@@ -505,52 +540,21 @@ Compress_new_rev (char *gbuffer, Chrpos_T length) {
new->blocks[ptr + i + 4] = low;
new->blocks[ptr + i + 8] = flags;
}
-
-#else
- high = low = flags = 0U;
- in_counter = 0;
- while (position > 0 && in_counter < 32) {
- c = gbuffer[--position];
- high >>= 1;
- low >>= 1;
- flags >>= 1;
-
- /* Assume that gbuffer is upper case */
- switch /*(uppercaseCode[c])*/ (c) {
- case 'T': /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
- case 'G': /* high |= LEFT_CLEAR; */ low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
- case 'C': high |= LEFT_SET; /* low |= LEFT_CLEAR; */ /* flags |= LEFT_CLEAR; */ break;
- case 'A': high |= LEFT_SET; low |= LEFT_SET; /* flags |= LEFT_CLEAR; */ break;
- default: /* high |= LEFT_CLEAR; */ /* low |= LEFT_CLEAR; */ flags |= LEFT_SET;
- }
- in_counter++;
- }
-
- while (in_counter < 32) {
- high >>= 1;
- low >>= 1;
- flags >>= 1;
- in_counter++;
- }
-
- new->blocks[ptr] = high;
- new->blocks[ptr+1] = low;
- new->blocks[ptr+2] = flags;
#endif
ptr += COMPRESS_BLOCKSIZE;
}
-#ifdef HAVE_SSE2
- /* Compress_shift will access these values */
- new->blocks[ptr] = new->blocks[ptr+1] = new->blocks[ptr+2] = new->blocks[ptr+3] = 0U;
- new->blocks[ptr+4] = new->blocks[ptr+5] = new->blocks[ptr+6] = new->blocks[ptr+7] = 0U;
- new->blocks[ptr+8] = new->blocks[ptr+9] = new->blocks[ptr+10] = new->blocks[ptr+11] = 0U;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
/* Compress_shift will access these values */
new->blocks[ptr] = 0U;
new->blocks[ptr+1] = 0U;
new->blocks[ptr+2] = 0U;
+#else
+ /* Compress_shift will access these values */
+ new->blocks[ptr] = new->blocks[ptr+1] = new->blocks[ptr+2] = new->blocks[ptr+3] = 0U;
+ new->blocks[ptr+4] = new->blocks[ptr+5] = new->blocks[ptr+6] = new->blocks[ptr+7] = 0U;
+ new->blocks[ptr+8] = new->blocks[ptr+9] = new->blocks[ptr+10] = new->blocks[ptr+11] = 0U;
#endif
debug0(printf("Compress_new_rev\n"));
@@ -1170,7 +1174,7 @@ shift_sse2 (T this, int nshift) {
-#ifndef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
Genomecomp_T *
Compress_shift (T this, int nshift) {
Genomecomp_T *shifted;
@@ -1606,7 +1610,8 @@ Compress32_shift (T this, int nshift) {
Genomecomp_T *shifted;
int rightshift;
int ptr;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
__m128i out, current, next;
#endif
#ifdef DEBUG9
@@ -1642,7 +1647,7 @@ Compress32_shift (T this, int nshift) {
shifted[1] = this->blocks[1] << nshift;
shifted[0] = this->blocks[0] << nshift;
-#elif defined(HAVE_SSE2)
+#elif defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
next = _mm_load_si128((__m128i *) &(this->blocks[ptr]));
while (ptr > 0) {
current = next;
@@ -1710,7 +1715,25 @@ Compress_get_16mer_left (UINT4 *high, UINT4 *low, UINT4 *flags, T this, int pos3
int columni, blocki;
Genomecomp_T *ptr, curr_high, curr_low, curr_flags, prev_high, prev_low, prev_flags;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ /* query is stored as 3 x 32-bit words */
+ blocki = pos3/32U*3;
+
+ ptr = &(this->blocks[blocki]);
+ curr_high = ptr[0];
+ curr_low = ptr[1];
+ curr_flags = ptr[2];
+
+ if (blocki == 0) {
+ prev_high = prev_low = prev_flags = 0U;
+ } else {
+ ptr -= 3;
+ prev_high = ptr[0];
+ prev_low = ptr[1];
+ prev_flags = ptr[2];
+ }
+
+#else
/* query is stored as 3 x 128-bit words */
columni = (pos3 % 128) / 32;
blocki = pos3/128U*12 + columni;
@@ -1733,23 +1756,6 @@ Compress_get_16mer_left (UINT4 *high, UINT4 *low, UINT4 *flags, T this, int pos3
prev_low = ptr[4];
prev_flags = ptr[8];
}
-#else
- /* query is stored as 3 x 32-bit words */
- blocki = pos3/32U*3;
-
- ptr = &(this->blocks[blocki]);
- curr_high = ptr[0];
- curr_low = ptr[1];
- curr_flags = ptr[2];
-
- if (blocki == 0) {
- prev_high = prev_low = prev_flags = 0U;
- } else {
- ptr -= 3;
- prev_high = ptr[0];
- prev_low = ptr[1];
- prev_flags = ptr[2];
- }
#endif
@@ -1784,7 +1790,21 @@ Compress_get_16mer_right (UINT4 *high, UINT4 *low, UINT4 *flags, T this, int pos
int columni, blocki;
Genomecomp_T *ptr, curr_high, curr_low, curr_flags, next_high, next_low, next_flags;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ /* query is stored as 3 x 32-bit words */
+ blocki = pos5/32U*3;
+
+ ptr = &(this->blocks[blocki]);
+ curr_high = ptr[0];
+ curr_low = ptr[1];
+ curr_flags = ptr[2];
+
+ ptr += 3;
+ next_high = ptr[0];
+ next_low = ptr[1];
+ next_flags = ptr[2];
+
+#else
/* query is stored as 3 x 128-bit words */
columni = (pos5 % 128) / 32;
blocki = pos5/128U*12 + columni;
@@ -1805,19 +1825,6 @@ Compress_get_16mer_right (UINT4 *high, UINT4 *low, UINT4 *flags, T this, int pos
next_low = ptr[4];
next_flags = ptr[8];
}
-#else
- /* query is stored as 3 x 32-bit words */
- blocki = pos5/32U*3;
-
- ptr = &(this->blocks[blocki]);
- curr_high = ptr[0];
- curr_low = ptr[1];
- curr_flags = ptr[2];
-
- ptr += 3;
- next_high = ptr[0];
- next_low = ptr[1];
- next_flags = ptr[2];
#endif
debug2(printf("high: %08X %08X\n",curr_high,next_high));
diff --git a/src/compress.h b/src/compress.h
index 41be273..8eb3483 100644
--- a/src/compress.h
+++ b/src/compress.h
@@ -1,4 +1,4 @@
-/* $Id: compress.h 157225 2015-01-22 18:47:23Z twu $ */
+/* $Id: compress.h 168395 2015-06-26 17:13:13Z twu $ */
#ifndef COMPRESS_INCLUDED
#define COMPRESS_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -19,10 +19,10 @@
SIMD in Compress_shift, so COMPRESS_BLOCKSIZE can be 3. */
-#ifdef HAVE_SSE2
-#define COMPRESS_BLOCKSIZE 12 /* 12 unsigned ints per block */
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
#define COMPRESS_BLOCKSIZE 3 /* 3 unsigned ints per block */
+#else
+#define COMPRESS_BLOCKSIZE 12 /* 12 unsigned ints per block */
#endif
diff --git a/src/config.h.in b/src/config.h.in
index ac9a8c4..07cbd67 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -181,6 +181,9 @@
/* Define to 1 if you have the `shmget' function. */
#undef HAVE_SHMGET
+/* Define to 1 if SHM_NORESERVE available for shmget. */
+#undef HAVE_SHM_NORESERVE
+
/* Define to 1 if you have the `sigaction' function. */
#undef HAVE_SIGACTION
diff --git a/src/dynprog_genome.c b/src/dynprog_genome.c
index 3958098..8d2c9dc 100644
--- a/src/dynprog_genome.c
+++ b/src/dynprog_genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_genome.c 145990 2014-08-25 21:47:32Z twu $";
+static char rcsid[] = "$Id: dynprog_genome.c 170390 2015-07-23 01:29:31Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -879,7 +879,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
if (watsonp == true) {
if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chroffset + leftoffset + cL;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -888,7 +888,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chroffset + rightoffset - cR + 1;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -898,7 +898,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
} else {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chroffset + leftoffset + cL;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -907,7 +907,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chroffset + rightoffset - cR + 1;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -919,7 +919,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
} else {
if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chrhigh - leftoffset - cL + 1;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -928,7 +928,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chrhigh - rightoffset + cR;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -938,7 +938,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
} else {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chrhigh - leftoffset - cL + 1;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -947,7 +947,7 @@ bridge_intron_gap_8_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chrhigh - rightoffset + cR;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -1826,7 +1826,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
if (watsonp == true) {
if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chroffset + leftoffset + cL;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -1835,7 +1835,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chroffset + rightoffset - cR + 1;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -1845,7 +1845,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
} else {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chroffset + leftoffset + cL;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -1854,7 +1854,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chroffset + rightoffset - cR + 1;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -1866,7 +1866,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
} else {
if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chrhigh - leftoffset - cL + 1;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -1875,7 +1875,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chrhigh - rightoffset + cR;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -1885,7 +1885,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
} else {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chrhigh - leftoffset - cL + 1;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -1894,7 +1894,7 @@ bridge_intron_gap_16_ud (int *finalscore, int *bestrL, int *bestrR, int *bestcL,
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chrhigh - rightoffset + cR;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -2625,7 +2625,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
if (watsonp == true) {
if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chroffset + leftoffset + cL;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -2634,7 +2634,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chroffset + rightoffset - cR + 1;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -2644,7 +2644,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
}
} else {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chroffset + leftoffset + cL;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -2653,7 +2653,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chroffset + rightoffset - cR + 1;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -2665,7 +2665,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
} else {
if (cdna_direction > 0) {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chrhigh - leftoffset - cL + 1;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -2674,7 +2674,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chrhigh - rightoffset + cR;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
@@ -2684,7 +2684,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
}
} else {
- for (cL = 0; cL < glengthL; cL++) {
+ for (cL = 0; cL < glengthL - 1; cL++) {
splicesitepos = chrhigh - leftoffset - cL + 1;
if (left_known[cL]) {
left_probabilities[cL] = 1.0;
@@ -2693,7 +2693,7 @@ bridge_intron_gap (int *finalscore, int *bestrL, int *bestrR, int *bestcL, int *
}
}
- for (cR = 0; cR < glengthR; cR++) {
+ for (cR = 0; cR < glengthR - 1; cR++) {
splicesitepos = chrhigh - rightoffset + cR;
if (right_known[cR]) {
right_probabilities[cR] = 1.0;
diff --git a/src/genome-write.c b/src/genome-write.c
index 6fae146..7cadc41 100644
--- a/src/genome-write.c
+++ b/src/genome-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome-write.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: genome-write.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -28,7 +28,7 @@ static char rcsid[] = "$Id: genome-write.c 153955 2014-11-24 17:54:45Z twu $";
#include "compress-write.h"
#include "iit-write.h"
#include "complement.h"
-#include "genome.h" /* For Genome_uncompress_mmap */
+#include "genome.h" /* For Genome_uncompress_memory */
#define CONTROLM 13 /* From PC */
@@ -606,7 +606,7 @@ fill_circular_chromosomes (UINT4 *genomecomp, Univ_IIT_T chromosome_iit, int cir
segment = (char *) CALLOC(seglength+1U,sizeof(char));
/* Add 1U because procedures below are expecting exclusive coordinates */
- Genome_uncompress_mmap(segment,genomecomp,orig_startpos,orig_endpos+1U);
+ Genome_uncompress_memory(segment,genomecomp,orig_startpos,orig_endpos+1U); /* not Genome_uncompress_mmap, which does bigendian conversion */
Compress_update_memory(/*nbadchars*/0,genomecomp,segment,alias_startpos,alias_endpos+1U);
FREE(segment);
}
diff --git a/src/genome.c b/src/genome.c
index fec13f2..ffdd265 100644
--- a/src/genome.c
+++ b/src/genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome.c 161940 2015-03-25 20:36:59Z twu $";
+static char rcsid[] = "$Id: genome.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -9148,6 +9148,125 @@ Genome_uncompress_mmap (char *gbuffer1, Genomecomp_T *blocks, Univcoord_T startp
}
+/* Same as Genome_uncompress_mmap, except does not perform bigendian conversion */
+void
+Genome_uncompress_memory (char *gbuffer1, Genomecomp_T *blocks, Univcoord_T startpos,
+ Univcoord_T endpos) {
+ /* Chrpos_T length = endpos - startpos; */
+ Univcoord_T startblock, endblock, ptr;
+ Genomecomp_T high, low, flags;
+ char Buffer[32];
+ int startdiscard, enddiscard;
+ Univcoord_T k = 0, i;
+
+ /* sequence = (char *) CALLOC(length+1,sizeof(char)); */
+
+ ptr = startblock = startpos/32U*3;
+ endblock = endpos/32U*3;
+ startdiscard = startpos % 32;
+ enddiscard = endpos % 32;
+
+ if (endblock == startblock) {
+ /* Special case */
+#if 0
+ high = Bigendian_convert_uint(blocks[ptr]);
+ low = Bigendian_convert_uint(blocks[ptr+1]);
+ flags = Bigendian_convert_uint(blocks[ptr+2]);
+#else
+ high = blocks[ptr]; low = blocks[ptr+1]; flags = blocks[ptr+2];
+#endif
+
+ memcpy(Buffer,nucleotides[low & 0x0000FFFF],8);
+ memcpy(&(Buffer[8]),nucleotides[low >> 16],8);
+ memcpy(&(Buffer[16]),nucleotides[high & 0x0000FFFF],8);
+ memcpy(&(Buffer[24]),nucleotides[high >> 16],8);
+ if (flags) {
+ for (i = 0; i < 32; i++) {
+ if (flags & 1U) {
+ Buffer[i] = 'N';
+ }
+ flags >>= 1;
+ }
+ }
+ memcpy(gbuffer1,&(Buffer[startdiscard]),(enddiscard - startdiscard));
+
+ } else {
+#if 0
+ high = Bigendian_convert_uint(blocks[ptr]);
+ low = Bigendian_convert_uint(blocks[ptr+1]);
+ flags = Bigendian_convert_uint(blocks[ptr+2]);
+#else
+ high = blocks[ptr]; low = blocks[ptr+1]; flags = blocks[ptr+2];
+#endif
+
+ memcpy(Buffer,nucleotides[low & 0x0000FFFF],8);
+ memcpy(&(Buffer[8]),nucleotides[low >> 16],8);
+ memcpy(&(Buffer[16]),nucleotides[high & 0x0000FFFF],8);
+ memcpy(&(Buffer[24]),nucleotides[high >> 16],8);
+ if (flags) {
+ for (i = 0; i < 32; i++) {
+ if (flags & 1U) {
+ Buffer[i] = 'N';
+ }
+ flags >>= 1;
+ }
+ }
+ memcpy(gbuffer1,&(Buffer[startdiscard]),k = 32 - startdiscard);
+ ptr += 3;
+
+ while (ptr < endblock) {
+#if 0
+ high = Bigendian_convert_uint(blocks[ptr]);
+ low = Bigendian_convert_uint(blocks[ptr+1]);
+ flags = Bigendian_convert_uint(blocks[ptr+2]);
+#else
+ high = blocks[ptr]; low = blocks[ptr+1]; flags = blocks[ptr+2];
+#endif
+
+ memcpy(&(gbuffer1[k]),nucleotides[low & 0x0000FFFF],8); k += 8;
+ memcpy(&(gbuffer1[k]),nucleotides[low >> 16],8); k += 8;
+ memcpy(&(gbuffer1[k]),nucleotides[high & 0x0000FFFF],8); k += 8;
+ memcpy(&(gbuffer1[k]),nucleotides[high >> 16],8); k += 8;
+ if (flags) {
+ for (i = k - 32; i < k; i++) {
+ if (flags & 1U) {
+ gbuffer1[i] = 'N';
+ }
+ flags >>= 1;
+ }
+ }
+ ptr += 3;
+ }
+
+ if (enddiscard > 0) {
+#if 0
+ high = Bigendian_convert_uint(blocks[ptr]);
+ low = Bigendian_convert_uint(blocks[ptr+1]);
+ flags = Bigendian_convert_uint(blocks[ptr+2]);
+#else
+ high = blocks[ptr]; low = blocks[ptr+1]; flags = blocks[ptr+2];
+#endif
+
+ memcpy(Buffer,nucleotides[low & 0x0000FFFF],8);
+ memcpy(&(Buffer[8]),nucleotides[low >> 16],8);
+ memcpy(&(Buffer[16]),nucleotides[high & 0x0000FFFF],8);
+ memcpy(&(Buffer[24]),nucleotides[high >> 16],8);
+ if (flags) {
+ for (i = 0; i < 32; i++) {
+ if (flags & 1U) {
+ Buffer[i] = 'N';
+ }
+ flags >>= 1;
+ }
+ }
+ memcpy(&(gbuffer1[k]),Buffer,enddiscard);
+ }
+ }
+
+ return;
+}
+
+
/* Correct procedure should look at alt high/low and normal flags, and substitute N based on normal flags */
/* May not handle wildcard positions correctly. A wildcard occurs if ref == alt && ref_flag == 0 && alt_flag == 1 */
@@ -10405,6 +10524,9 @@ Genome_setup (T genome_in, T genomealt_in, Mode_T mode_in, int circular_typeint_
} else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
fwd_conversion = "GCGT";
rev_conversion = "ACGC";
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ fwd_conversion = "ACGC";
+ rev_conversion = "GCGT";
}
circular_typeint = circular_typeint_in;
return;
diff --git a/src/genome.h b/src/genome.h
index 41c7430..ec39b70 100644
--- a/src/genome.h
+++ b/src/genome.h
@@ -1,4 +1,4 @@
-/* $Id: genome.h 161940 2015-03-25 20:36:59Z twu $ */
+/* $Id: genome.h 168395 2015-06-26 17:13:13Z twu $ */
#ifndef GENOME_INCLUDED
#define GENOME_INCLUDED
@@ -39,6 +39,9 @@ Genome_user_setup (Genomecomp_T *genome_blocks_in);
extern void
Genome_uncompress_mmap (char *gbuffer1, Genomecomp_T *blocks, Univcoord_T startpos,
Univcoord_T endpos);
+extern void
+Genome_uncompress_memory (char *gbuffer1, Genomecomp_T *blocks, Univcoord_T startpos,
+ Univcoord_T endpos);
extern bool
Genome_fill_buffer (Chrnum_T *chrnum, int *nunknowns, T this, Univcoord_T left, Chrpos_T length, char *gbuffer1,
Univ_IIT_T chromosome_iit);
diff --git a/src/genome128_hr.c b/src/genome128_hr.c
index f79c657..825dd39 100644
--- a/src/genome128_hr.c
+++ b/src/genome128_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome128_hr.c 166739 2015-06-02 01:23:18Z twu $";
+static char rcsid[] = "$Id: genome128_hr.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -24,7 +24,15 @@ static char rcsid[] = "$Id: genome128_hr.c 166739 2015-06-02 01:23:18Z twu $";
#include "compress.h"
#include "popcount.h"
-#ifdef HAVE_SSE2
+#ifdef WORDS_BIGENDIAN
+#include "bigendian.h"
+#else
+#include "littleendian.h"
+#endif
+
+#ifdef WORDS_BIGENDIAN
+/* Do not use SIMD */
+#elif defined(HAVE_SSE2)
#include <emmintrin.h>
#endif
#ifdef HAVE_SSE4_1
@@ -40,13 +48,6 @@ static char rcsid[] = "$Id: genome128_hr.c 166739 2015-06-02 01:23:18Z twu $";
#include <immintrin.h>
#endif
-#ifdef WORDS_BIGENDIAN
-#include "bigendian.h"
-#else
-#include "littleendian.h"
-#endif
-
-
#ifdef DEBUG
#define debug(x) x
#else
@@ -16603,18 +16604,18 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
/* sequence = (char *) CALLOC(length+1,sizeof(char)); */
-#ifdef HAVE_SSE2
- startcolumni = (startpos % 128) / 32;
- endcolumni = (endpos % 128) / 32;
-
- startblocki = startpos/128U*12;
- endblocki = endpos/128U*12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startcolumni = (startpos % 128) / 32;
startblocki = startpos/128U*12 + startcolumni;
endcolumni = (endpos % 128) / 32;
endblocki = endpos/128U*12 + endcolumni;
+#else
+ startcolumni = (startpos % 128) / 32;
+ endcolumni = (endpos % 128) / 32;
+
+ startblocki = startpos/128U*12;
+ endblocki = endpos/128U*12;
#endif
startdiscard32 = startpos % 32;
@@ -16623,7 +16624,24 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
ptr = &(blocks[startblocki]);
while (ptr <= &(blocks[endblocki])) {
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN)
+ high = Bigendian_convert_uint(ptr[0]);
+ low = Bigendian_convert_uint(ptr[4]);
+ flags = Bigendian_convert_uint(ptr[8]);
+ printf("high: %08X low: %08X flags: %08X\t",high,low,flags);
+ write_chars(high,low,flags);
+ printf("\n");
+
+ ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#elif !defined(HAVE_SSE2)
+ high = ptr[0]; low = ptr[4]; flags = ptr[8];
+ printf("high: %08X low: %08X flags: %08X\t",high,low,flags);
+ write_chars(high,low,flags);
+ printf("\n");
+
+ ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+
+#else
if (startcolumni == 0) {
/* high: 9F61B62A low: 6D68A157 flags: 00000000 */
printf(" \t");
@@ -16747,13 +16765,6 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
printf("\n");
ptr += 12;
-#else
- high = ptr[0]; low = ptr[4]; flags = ptr[8];
- printf("high: %08X low: %08X flags: %08X\t",high,low,flags);
- write_chars(high,low,flags);
- printf("\n");
-
- ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
#endif
}
@@ -16773,15 +16784,15 @@ Genome_print_blocks_snp (Genomecomp_T *blocks, Genomecomp_T *snp_blocks, Univcoo
/* sequence = (char *) CALLOC(length+1,sizeof(char)); */
-#ifdef HAVE_SSE2
- startblocki = startpos/128U*12;
- endblocki = endpos/128U*12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startcolumni = (startpos % 128) / 32;
startblocki = startpos/128U*12 + startcolumni;
endcolumni = (endpos % 128) / 32;
endblocki = endpos/128U*12 + endcolumni;
+#else
+ startblocki = startpos/128U*12;
+ endblocki = endpos/128U*12;
#endif
startdiscard32 = startpos % 32;
@@ -16790,7 +16801,11 @@ Genome_print_blocks_snp (Genomecomp_T *blocks, Genomecomp_T *snp_blocks, Univcoo
ref_ptr = &(blocks[startblocki]);
snp_ptr = &(snp_blocks[startblocki]);
while (ref_ptr <= &(blocks[endblocki])) {
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ high = ref_ptr[0]; low = ref_ptr[4]; flags = ref_ptr[8]; snpmask = snp_ptr[8];
+ printf("high: %08X low: %08X flags: %08X snpmask: %08X\n",high,low,flags,snpmask);
+ ref_ptr += 1; snp_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; snp_ptr += 8; startcolumni = 0;}
+#else
high = ref_ptr[0]; low = ref_ptr[4]; flags = ref_ptr[8]; snpmask = snp_ptr[8];
printf("high: %08X low: %08X flags: %08X snpmask: %08X\n",high,low,flags,snpmask);
@@ -16804,11 +16819,6 @@ Genome_print_blocks_snp (Genomecomp_T *blocks, Genomecomp_T *snp_blocks, Univcoo
printf("high: %08X low: %08X flags: %08X snpmask: %08X\n",high,low,flags,snpmask);
ref_ptr += 12; snp_ptr += 12;
-
-#else
- high = ref_ptr[0]; low = ref_ptr[4]; flags = ref_ptr[8]; snpmask = snp_ptr[8];
- printf("high: %08X low: %08X flags: %08X snpmask: %08X\n",high,low,flags,snpmask);
- ref_ptr += 1; snp_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; snp_ptr += 8; startcolumni = 0;}
#endif
}
@@ -16829,12 +16839,12 @@ static Genomecomp_T *snp_blocks;
static bool query_unk_mismatch_p = false;
static bool genome_unk_mismatch_p = true;
-#ifdef HAVE_SSE2
-typedef __m128i Genomediff_T;
-#define STEP_SIZE 128
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
typedef UINT4 Genomediff_T;
#define STEP_SIZE 32
+#else
+typedef __m128i Genomediff_T;
+#define STEP_SIZE 128
#endif
@@ -16844,34 +16854,35 @@ block_diff_standard_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
UINT4 diff;
debug(printf("Comparing high: query %08X with genome %08X ",query_shifted[0],ref_ptr[0]));
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
debug(printf("Comparing low: query %08X with genome %08X ",query_shifted[4],ref_ptr[4]));
#endif
-#ifdef HAVE_SSE2
- diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[4] ^ ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff = (query_shifted[0] ^ Bigendian_convert_uint(ref_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[1] ^ ref_ptr[4]);
+#else
+ diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[4] ^ ref_ptr[4]);
#endif
/* Query Ns */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
if (query_unk_mismatch_local_p) {
/* Query: Considering N as a mismatch */
- diff |= query_shifted[8];
+ diff |= query_shifted[2];
} else {
/* Query: Considering N as a wildcard */
- diff &= ~(query_shifted[8]);
+ diff &= ~(query_shifted[2]);
}
#else
if (query_unk_mismatch_local_p) {
/* Query: Considering N as a mismatch */
- diff |= query_shifted[2];
+ diff |= query_shifted[8];
} else {
/* Query: Considering N as a wildcard */
- diff &= ~(query_shifted[2]);
+ diff &= ~(query_shifted[8]);
}
#endif
@@ -16901,33 +16912,7 @@ block_diff_standard_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_standard (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
-#ifdef HAVE_SSE2
- __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
-
- _query_high = _mm_load_si128((__m128i *) query_shifted);
- _ref_high = _mm_load_si128((__m128i *) ref_ptr);
- _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
- _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
-
- _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
-
- _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
- if (query_unk_mismatch_local_p) {
- _diff = _mm_or_si128(_query_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_query_flags, _diff);
- }
-
- _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
- if (genome_unk_mismatch_p) {
- _diff = _mm_or_si128(_ref_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_ref_flags, _diff);
- }
-
- return _diff;
-
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT4 diff;
debug(printf("Comparing high: query %08X with genome %08X ",query_shifted[0],ref_ptr[0]));
@@ -16968,8 +16953,33 @@ block_diff_standard (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf(" => diff %08X\n",diff));
return diff;
-#endif
+#else
+ __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+ _query_high = _mm_load_si128((__m128i *) query_shifted);
+ _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+ _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+ _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+
+ _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+ _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
+ if (query_unk_mismatch_local_p) {
+ _diff = _mm_or_si128(_query_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_query_flags, _diff);
+ }
+
+ _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
+ if (genome_unk_mismatch_p) {
+ _diff = _mm_or_si128(_ref_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_ref_flags, _diff);
+ }
+
+ return _diff;
+#endif
}
@@ -16980,28 +16990,30 @@ block_diff_standard_wildcard_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_
UINT4 diff, non_wildcard;
/* Taken from block_diff_standard */
-#ifdef HAVE_SSE2
- diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[4] ^ ref_ptr[4]);
-#else
+#ifdef WORDS_BIGENDIAN
+ diff = (query_shifted[0] ^ Bigendian_convert_uint(ref_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
+#elif !defined(HAVE_SSE2)
diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[1] ^ ref_ptr[4]);
+#else
+ diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[4] ^ ref_ptr[4]);
#endif
/* Query Ns */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
if (query_unk_mismatch_local_p) {
/* Query: Considering N as a mismatch */
- diff |= query_shifted[8];
+ diff |= query_shifted[2];
} else {
/* Query: Considering N as a wildcard */
- diff &= ~(query_shifted[8]);
+ diff &= ~(query_shifted[2]);
}
#else
if (query_unk_mismatch_local_p) {
/* Query: Considering N as a mismatch */
- diff |= query_shifted[2];
+ diff |= query_shifted[8];
} else {
/* Query: Considering N as a wildcard */
- diff &= ~(query_shifted[2]);
+ diff &= ~(query_shifted[8]);
}
#endif
@@ -17023,10 +17035,12 @@ block_diff_standard_wildcard_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_
}
/* Add difference relative to SNP */
-#ifdef HAVE_SSE2
- diff &= (query_shifted[0] ^ snp_ptr[0]) | (query_shifted[4] ^ snp_ptr[4]);
-#else
+#ifdef WORDS_BIGENDIAN
+ diff &= (query_shifted[0] ^ Bigendian_convert_uint(snp_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(snp_ptr[4]));
+#elif !defined(HAVE_SSE2)
diff &= (query_shifted[0] ^ snp_ptr[0]) | (query_shifted[1] ^ snp_ptr[4]);
+#else
+ diff &= (query_shifted[0] ^ snp_ptr[0]) | (query_shifted[4] ^ snp_ptr[4]);
#endif
/* Test for equality of ref and alt */
@@ -17067,54 +17081,16 @@ block_diff_standard_wildcard_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_
static Genomediff_T
block_diff_standard_wildcard (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
-#ifdef HAVE_SSE2
- __m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
- _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
-
- _query_high = _mm_load_si128((__m128i *) query_shifted);
- _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
- _ref_high = _mm_load_si128((__m128i *) ref_ptr);
- _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
-
- _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
-
- _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
- if (query_unk_mismatch_local_p) {
- _diff = _mm_or_si128(_query_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_query_flags, _diff);
- }
-
- _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
- if (genome_unk_mismatch_p) {
- _diff = _mm_or_si128(_ref_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_ref_flags, _diff);
- }
- /* End of (query ^ ref) */
-
-
- /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */
- _snp_high = _mm_load_si128((__m128i *) snp_ptr);
- _snp_low = _mm_load_si128((__m128i *) &(snp_ptr[4]));
-
- _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low)));
-
-
- /* Test for equality of ref and alt */
- _snp_flags = _mm_load_si128((__m128i *) &(snp_ptr[8]));
- _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags);
- _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard);
-
- _diff = _mm_andnot_si128(_wildcard, _diff);
- return _diff;
-
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT4 diff, non_wildcard;
/* Taken from block_diff_standard */
+#ifdef WORDS_BIGENDIAN
+ diff = (query_shifted[0] ^ Bigendian_convert_uint(ref_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
+#else
diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[1] ^ ref_ptr[4]);
+#endif
/* Query Ns */
if (query_unk_mismatch_local_p) {
@@ -17143,7 +17119,11 @@ block_diff_standard_wildcard (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr
}
/* Add difference relative to SNP */
+#ifdef WORDS_BIGENDIAN
+ diff &= (query_shifted[0] ^ Bigendian_convert_uint(snp_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(snp_ptr[4]));
+#else
diff &= (query_shifted[0] ^ snp_ptr[0]) | (query_shifted[1] ^ snp_ptr[4]);
+#endif
/* Test for equality of ref and alt */
debug(printf("Equality high: ref genome %08X with alt genome %08X ",ref_ptr[0],snp_ptr[0]));
@@ -17173,6 +17153,49 @@ block_diff_standard_wildcard (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr
debug(printf(" => non_wildcard %08X\n",non_wildcard));
return diff & non_wildcard;
+
+#else
+ __m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
+ _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
+
+ _query_high = _mm_load_si128((__m128i *) query_shifted);
+ _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+ _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+ _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+
+ _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+ _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
+ if (query_unk_mismatch_local_p) {
+ _diff = _mm_or_si128(_query_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_query_flags, _diff);
+ }
+
+ _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
+ if (genome_unk_mismatch_p) {
+ _diff = _mm_or_si128(_ref_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_ref_flags, _diff);
+ }
+ /* End of (query ^ ref) */
+
+
+ /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */
+ _snp_high = _mm_load_si128((__m128i *) snp_ptr);
+ _snp_low = _mm_load_si128((__m128i *) &(snp_ptr[4]));
+
+ _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low)));
+
+
+ /* Test for equality of ref and alt */
+ _snp_flags = _mm_load_si128((__m128i *) &(snp_ptr[8]));
+ _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags);
+ _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard);
+
+ _diff = _mm_andnot_si128(_wildcard, _diff);
+
+ return _diff;
#endif
}
@@ -17192,31 +17215,31 @@ block_diff_metct_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
diff = 0U;
} else {
/* Mark genome-T to query-C mismatches */
-#ifdef HAVE_SSE2
- diff = (~(query_shifted[0]) & query_shifted[4]) & (ref_ptr[0] & ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff = (~(query_shifted[0]) & query_shifted[1]) &
(Bigendian_convert_uint(ref_ptr[0]) & Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
diff = (~(query_shifted[0]) & query_shifted[1]) & (ref_ptr[0] & ref_ptr[4]);
+#else
+ diff = (~(query_shifted[0]) & query_shifted[4]) & (ref_ptr[0] & ref_ptr[4]);
#endif
debug(printf(" => diff %08X\n",diff));
}
/* Compare reduced C->T nts */
-#ifdef HAVE_SSE2
- diff |= ((query_shifted[0] | query_shifted[4]) ^ (ref_ptr[0] | ref_ptr[4])) | (query_shifted[4] ^ ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff |= ((query_shifted[0] | query_shifted[1]) ^ (Bigendian_convert_uint(ref_ptr[0]) | Bigendian_convert_uint(ref_ptr[4]))) |
(query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
diff |= ((query_shifted[0] | query_shifted[1]) ^ (ref_ptr[0] | ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
+#else
+ diff |= ((query_shifted[0] | query_shifted[4]) ^ (ref_ptr[0] | ref_ptr[4])) | (query_shifted[4] ^ ref_ptr[4]);
#endif
debug(printf(" => diff %08X\n",diff));
/* Flags: Considering N as a mismatch */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
if (query_unk_mismatch_local_p) {
debug(printf("Marking query flags: query %08X ",query_shifted[8]));
diff |= query_shifted[8];
@@ -17260,43 +17283,8 @@ block_diff_metct_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
bool query_unk_mismatch_local_p, bool sarrayp) {
-#ifdef HAVE_SSE2
- __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
- _query_high = _mm_load_si128((__m128i *) query_shifted);
- _ref_high = _mm_load_si128((__m128i *) ref_ptr);
- _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
- _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
-
- if (sarrayp == true) {
- /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */
- _diff = _mm_setzero_si128();
- } else {
- /* Mark genome-T to query-C mismatches */
- _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low));
- }
-
- /* Compare reduced C->T nts */
- _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low)));
- _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
-
- _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
- if (query_unk_mismatch_local_p) {
- _diff = _mm_or_si128(_query_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_query_flags, _diff);
- }
-
- _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
- if (genome_unk_mismatch_p) {
- _diff = _mm_or_si128(_ref_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_ref_flags, _diff);
- }
-
- return _diff;
-
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT4 diff;
if (sarrayp == true) {
@@ -17350,6 +17338,42 @@ block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf(" => diff %08X\n",diff));
return diff;
+
+#else
+ __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+ _query_high = _mm_load_si128((__m128i *) query_shifted);
+ _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+ _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+ _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+
+ if (sarrayp == true) {
+ /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */
+ _diff = _mm_setzero_si128();
+ } else {
+ /* Mark genome-T to query-C mismatches */
+ _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low));
+ }
+
+ /* Compare reduced C->T nts */
+ _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low)));
+ _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+ _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
+ if (query_unk_mismatch_local_p) {
+ _diff = _mm_or_si128(_query_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_query_flags, _diff);
+ }
+
+ _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
+ if (genome_unk_mismatch_p) {
+ _diff = _mm_or_si128(_ref_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_ref_flags, _diff);
+ }
+
+ return _diff;
#endif
}
@@ -17364,39 +17388,31 @@ block_diff_metga_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
diff = 0U;
} else {
/* Mark genome-A to query-G mismatches */
-#ifdef HAVE_SSE2
- diff = (query_shifted[0] & ~(query_shifted[4])) & ~(ref_ptr[0] | ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff = (query_shifted[0] & ~(query_shifted[1])) &
~(Bigendian_convert_uint(ref_ptr[0]) | Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
diff = (query_shifted[0] & ~(query_shifted[1])) & ~(ref_ptr[0] | ref_ptr[4]);
+#else
+ diff = (query_shifted[0] & ~(query_shifted[4])) & ~(ref_ptr[0] | ref_ptr[4]);
#endif
debug(printf(" => diff %08X\n",diff));
}
/* Compare reduced G->A nts */
-#ifdef HAVE_SSE2
- diff |= ((query_shifted[0] & query_shifted[4]) ^ (ref_ptr[0] & ref_ptr[4])) | (query_shifted[4] ^ ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff |= ((query_shifted[0] & query_shifted[1]) ^ (Bigendian_convert_uint(ref_ptr[0]) & Bigendian_convert_uint(ref_ptr[4]))) |
(query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
diff |= ((query_shifted[0] & query_shifted[1]) ^ (ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
+#else
+ diff |= ((query_shifted[0] & query_shifted[4]) ^ (ref_ptr[0] & ref_ptr[4])) | (query_shifted[4] ^ ref_ptr[4]);
#endif
debug(printf(" => diff %08X\n",diff));
/* Flags: Considering N as a mismatch */
-#ifdef HAVE_SSE2
- if (query_unk_mismatch_local_p) {
- debug(printf("Marking query flags: query %08X ",query_shifted[8]));
- diff |= query_shifted[8];
- } else {
- debug(printf("Clearing query flags: query %08X ",query_shifted[8]));
- diff &= ~(query_shifted[8]);
- }
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
if (query_unk_mismatch_local_p) {
debug(printf("Marking query flags: query %08X ",query_shifted[2]));
diff |= query_shifted[2];
@@ -17404,6 +17420,14 @@ block_diff_metga_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
diff &= ~(query_shifted[2]);
}
+#else
+ if (query_unk_mismatch_local_p) {
+ debug(printf("Marking query flags: query %08X ",query_shifted[8]));
+ diff |= query_shifted[8];
+ } else {
+ debug(printf("Clearing query flags: query %08X ",query_shifted[8]));
+ diff &= ~(query_shifted[8]);
+ }
#endif
if (genome_unk_mismatch_p) {
@@ -17432,45 +17456,8 @@ block_diff_metga_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
static Genomediff_T
block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
bool query_unk_mismatch_local_p, bool sarrayp) {
-#ifdef HAVE_SSE2
- __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
-
- _query_high = _mm_load_si128((__m128i *) query_shifted);
- _ref_high = _mm_load_si128((__m128i *) ref_ptr);
- _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
- _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
-
- if (sarrayp == true) {
- /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */
- _diff = _mm_setzero_si128();
- } else {
- /* Mark genome-A to query-G mismatches */
- _diff = _mm_andnot_si128(_query_low, _query_high);
- _diff = _mm_andnot_si128(_ref_high, _diff);
- _diff = _mm_andnot_si128(_ref_low, _diff);
- }
-
- /* Compare reduced G->A nts */
- _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)));
- _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
- _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
- if (query_unk_mismatch_local_p) {
- _diff = _mm_or_si128(_query_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_query_flags, _diff);
- }
-
- _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
- if (genome_unk_mismatch_p) {
- _diff = _mm_or_si128(_ref_flags, _diff);
- } else {
- _diff = _mm_andnot_si128(_ref_flags, _diff);
- }
-
- return _diff;
-
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT4 diff;
if (sarrayp == true) {
@@ -17524,6 +17511,44 @@ block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf(" => diff %08X\n",diff));
return diff;
+
+#else
+ __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+ _query_high = _mm_load_si128((__m128i *) query_shifted);
+ _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+ _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+ _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+
+ if (sarrayp == true) {
+ /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */
+ _diff = _mm_setzero_si128();
+ } else {
+ /* Mark genome-A to query-G mismatches */
+ _diff = _mm_andnot_si128(_query_low, _query_high);
+ _diff = _mm_andnot_si128(_ref_high, _diff);
+ _diff = _mm_andnot_si128(_ref_low, _diff);
+ }
+
+ /* Compare reduced G->A nts */
+ _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)));
+ _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+ _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
+ if (query_unk_mismatch_local_p) {
+ _diff = _mm_or_si128(_query_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_query_flags, _diff);
+ }
+
+ _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
+ if (genome_unk_mismatch_p) {
+ _diff = _mm_or_si128(_ref_flags, _diff);
+ } else {
+ _diff = _mm_andnot_si128(_ref_flags, _diff);
+ }
+
+ return _diff;
#endif
}
@@ -17654,33 +17679,41 @@ block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
diff = 0U;
} else {
/* Mark genome-G to query-A mismatches */
-#ifdef HAVE_SSE2
- diff = ~(query_shifted[0] | query_shifted[4]) & (ref_ptr[0] & ~(ref_ptr[4]));
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff = ~(query_shifted[0] | query_shifted[1]) &
(Bigendian_convert_uint(ref_ptr[0]) & ~Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
diff = ~(query_shifted[0] | query_shifted[1]) & (ref_ptr[0] & ~(ref_ptr[4]));
+#else
+ diff = ~(query_shifted[0] | query_shifted[4]) & (ref_ptr[0] & ~(ref_ptr[4]));
#endif
debug(printf(" => diff %08X\n",diff));
}
/* Compare reduced A->G nts */
-#ifdef HAVE_SSE2
- diff |= ((query_shifted[0] | ~(query_shifted[4])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) | ~(Bigendian_convert_uint(ref_ptr[4])))) |
(query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
/* Because (a ^ b) = (~a ^ ~b), this is equivalent to
diff |= ((~query_shifted[0] & query_shifted[1]) ^ (~ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
*/
+#else
+ diff |= ((query_shifted[0] | ~(query_shifted[4])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]);
#endif
debug(printf(" => diff %08X\n",diff));
/* Flags: Considering N as a mismatch */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ if (query_unk_mismatch_local_p) {
+ debug(printf("Marking query flags: query %08X ",query_shifted[2]));
+ diff |= query_shifted[2];
+ } else {
+ debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
+ diff &= ~(query_shifted[2]);
+ }
+#else
if (query_unk_mismatch_local_p) {
debug(printf("Marking query flags: query %08X ",query_shifted[8]));
diff |= query_shifted[8];
@@ -17688,7 +17721,64 @@ block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf("Clearing query flags: query %08X ",query_shifted[8]));
diff &= ~(query_shifted[8]);
}
+#endif
+
+ if (genome_unk_mismatch_p) {
+ debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
+#ifdef WORDS_BIGENDIAN
+ diff |= Bigendian_convert_uint(ref_ptr[8]);
+#else
+ diff |= (ref_ptr[8]);
+#endif
+ } else {
+ debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8]));
+#ifdef WORDS_BIGENDIAN
+ diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
+#else
+ diff &= ~(ref_ptr[8]);
+#endif
+ }
+ debug(printf(" => diff %08X\n",diff));
+
+ return diff;
+}
+
+
+/* Convert A->G: new high = high | ~low */
+static Genomediff_T
+block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+ bool query_unk_mismatch_local_p, bool sarrayp) {
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ UINT4 diff;
+
+ if (sarrayp == true) {
+ /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */
+ diff = 0U;
+ } else {
+ /* Mark genome-G to query-A mismatches */
+#ifdef WORDS_BIGENDIAN
+ diff = ~(query_shifted[0] | query_shifted[1]) &
+ (Bigendian_convert_uint(ref_ptr[0]) & ~Bigendian_convert_uint(ref_ptr[4]));
+#else
+ diff = ~(query_shifted[0] | query_shifted[1]) & (ref_ptr[0] & ~(ref_ptr[4]));
+#endif
+ debug(printf(" => diff %08X\n",diff));
+ }
+
+ /* Compare reduced A->G nts */
+#ifdef WORDS_BIGENDIAN
+ diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) | ~(Bigendian_convert_uint(ref_ptr[4])))) |
+ (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
#else
+ diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
+ /* Because (a ^ b) = (~a ^ ~b), this is equivalent to
+ diff |= ((~query_shifted[0] & query_shifted[1]) ^ (~ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
+ */
+#endif
+ debug(printf(" => diff %08X\n",diff));
+
+ /* Flags: Considering N as a mismatch */
if (query_unk_mismatch_local_p) {
debug(printf("Marking query flags: query %08X ",query_shifted[2]));
diff |= query_shifted[2];
@@ -17696,7 +17786,6 @@ block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
diff &= ~(query_shifted[2]);
}
-#endif
if (genome_unk_mismatch_p) {
debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
@@ -17716,14 +17805,8 @@ block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf(" => diff %08X\n",diff));
return diff;
-}
-
-/* Convert A->G: new high = high | ~low */
-static Genomediff_T
-block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool query_unk_mismatch_local_p, bool sarrayp) {
-#ifdef HAVE_SSE2
+#else
__m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
_query_high = _mm_load_si128((__m128i *) query_shifted);
@@ -17758,37 +17841,52 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
}
return _diff;
+#endif
+}
-#else
+
+static UINT4
+block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+ bool query_unk_mismatch_local_p, bool sarrayp) {
UINT4 diff;
if (sarrayp == true) {
- /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */
+ /* Ignore genome-C to query-T mismatches */
diff = 0U;
} else {
- /* Mark genome-G to query-A mismatches */
+ /* Mark genome-C to query-T mismatches */
#ifdef WORDS_BIGENDIAN
- diff = ~(query_shifted[0] | query_shifted[1]) &
- (Bigendian_convert_uint(ref_ptr[0]) & ~Bigendian_convert_uint(ref_ptr[4]));
+ diff = (query_shifted[0] & query_shifted[1]) &
+ (~(Bigendian_convert_uint(ref_ptr[0])) & Bigendian_convert_uint(ref_ptr[4]));
+#elif !defined(HAVE_SSE2)
+ diff = (query_shifted[0] & query_shifted[1]) & (~(ref_ptr[0]) & ref_ptr[4]);
#else
- diff = ~(query_shifted[0] | query_shifted[1]) & (ref_ptr[0] & ~(ref_ptr[4]));
+ diff = (query_shifted[0] & query_shifted[4]) & (~(ref_ptr[0]) & ref_ptr[4]);
#endif
debug(printf(" => diff %08X\n",diff));
}
- /* Compare reduced A->G nts */
+ /* Compare reduced T->C nts */
#ifdef WORDS_BIGENDIAN
- diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) | ~(Bigendian_convert_uint(ref_ptr[4])))) |
+ diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) & ~(Bigendian_convert_uint(ref_ptr[4])))) |
(query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
+#elif !defined(HAVE_SSE2)
+ diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (ref_ptr[0] & ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
#else
- diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
- /* Because (a ^ b) = (~a ^ ~b), this is equivalent to
- diff |= ((~query_shifted[0] & query_shifted[1]) ^ (~ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
- */
+ diff |= ((query_shifted[0] & ~(query_shifted[4])) ^ (ref_ptr[0] & ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]);
#endif
debug(printf(" => diff %08X\n",diff));
/* Flags: Considering N as a mismatch */
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ if (query_unk_mismatch_local_p) {
+ debug(printf("Marking query flags: query %08X ",query_shifted[8]));
+ diff |= query_shifted[8];
+ } else {
+ debug(printf("Clearing query flags: query %08X ",query_shifted[8]));
+ diff &= ~(query_shifted[8]);
+ }
+#else
if (query_unk_mismatch_local_p) {
debug(printf("Marking query flags: query %08X ",query_shifted[2]));
diff |= query_shifted[2];
@@ -17796,6 +17894,7 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
diff &= ~(query_shifted[2]);
}
+#endif
if (genome_unk_mismatch_p) {
debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
@@ -17815,13 +17914,15 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf(" => diff %08X\n",diff));
return diff;
-#endif
}
-static UINT4
-block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool query_unk_mismatch_local_p, bool sarrayp) {
+/* Convert T->C: new high = high & ~low */
+static Genomediff_T
+block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+ bool query_unk_mismatch_local_p, bool sarrayp) {
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
UINT4 diff;
if (sarrayp == true) {
@@ -17829,9 +17930,7 @@ block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
diff = 0U;
} else {
/* Mark genome-C to query-T mismatches */
-#ifdef HAVE_SSE2
- diff = (query_shifted[0] & query_shifted[4]) & (~(ref_ptr[0]) & ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff = (query_shifted[0] & query_shifted[1]) &
(~(Bigendian_convert_uint(ref_ptr[0])) & Bigendian_convert_uint(ref_ptr[4]));
#else
@@ -17841,9 +17940,7 @@ block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
}
/* Compare reduced T->C nts */
-#ifdef HAVE_SSE2
- diff |= ((query_shifted[0] & ~(query_shifted[4])) ^ (ref_ptr[0] & ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]);
-#elif defined(WORDS_BIGENDIAN)
+#ifdef WORDS_BIGENDIAN
diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) & ~(Bigendian_convert_uint(ref_ptr[4])))) |
(query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
#else
@@ -17852,7 +17949,6 @@ block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf(" => diff %08X\n",diff));
/* Flags: Considering N as a mismatch */
-#ifdef HAVE_SSE2
if (query_unk_mismatch_local_p) {
debug(printf("Marking query flags: query %08X ",query_shifted[2]));
diff |= query_shifted[2];
@@ -17860,15 +17956,6 @@ block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
diff &= ~(query_shifted[2]);
}
-#else
- if (query_unk_mismatch_local_p) {
- debug(printf("Marking query flags: query %08X ",query_shifted[8]));
- diff |= query_shifted[8];
- } else {
- debug(printf("Clearing query flags: query %08X ",query_shifted[8]));
- diff &= ~(query_shifted[8]);
- }
-#endif
if (genome_unk_mismatch_p) {
debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
@@ -17888,14 +17975,8 @@ block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
debug(printf(" => diff %08X\n",diff));
return diff;
-}
-
-/* Convert T->C: new high = high & ~low */
-static Genomediff_T
-block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
- bool query_unk_mismatch_local_p, bool sarrayp) {
-#ifdef HAVE_SSE2
+#else
__m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
_query_high = _mm_load_si128((__m128i *) query_shifted);
@@ -17930,171 +18011,233 @@ block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
}
return _diff;
+#endif
+}
-#else
- UINT4 diff;
- if (sarrayp == true) {
- /* Ignore genome-C to query-T mismatches */
- diff = 0U;
+static UINT4
+block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+ if (genestrand == +2) {
+ if (plusp) {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
} else {
- /* Mark genome-C to query-T mismatches */
-#ifdef WORDS_BIGENDIAN
- diff = (query_shifted[0] & query_shifted[1]) &
- (~(Bigendian_convert_uint(ref_ptr[0])) & Bigendian_convert_uint(ref_ptr[4]));
-#else
- diff = (query_shifted[0] & query_shifted[1]) & (~(ref_ptr[0]) & ref_ptr[4]);
-#endif
- debug(printf(" => diff %08X\n",diff));
+ if (plusp) {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
}
+}
- /* Compare reduced T->C nts */
-#ifdef WORDS_BIGENDIAN
- diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) & ~(Bigendian_convert_uint(ref_ptr[4])))) |
- (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
- diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (ref_ptr[0] & ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
-#endif
- debug(printf(" => diff %08X\n",diff));
- /* Flags: Considering N as a mismatch */
- if (query_unk_mismatch_local_p) {
- debug(printf("Marking query flags: query %08X ",query_shifted[2]));
- diff |= query_shifted[2];
+static Genomediff_T
+block_diff_atoi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+ if (genestrand == +2) {
+ if (plusp) {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
} else {
- debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
- diff &= ~(query_shifted[2]);
+ if (plusp) {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
}
+}
- if (genome_unk_mismatch_p) {
- debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
- diff |= Bigendian_convert_uint(ref_ptr[8]);
-#else
- diff |= (ref_ptr[8]);
-#endif
+static UINT4
+block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+ if (genestrand == +2) {
+ if (plusp) {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ }
} else {
- debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
- diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
-#else
- diff &= ~(ref_ptr[8]);
-#endif
+ if (plusp) {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ }
}
- debug(printf(" => diff %08X\n",diff));
+}
- return diff;
-#endif
+static Genomediff_T
+block_diff_atoi_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+ if (genestrand == +2) {
+ if (plusp) {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ }
+ } else {
+ if (plusp) {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ }
+ }
+}
+
+/* Ignores snp_ptr */
+static UINT4
+block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+ if (genestrand == +2) {
+ if (plusp) {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
+ } else {
+ if (plusp) {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
+ }
}
+/* Ignores snp_ptr */
+static Genomediff_T
+block_diff_atoi_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+ bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+ if (genestrand == +2) {
+ if (plusp) {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
+ } else {
+ if (plusp) {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ }
+ }
+}
+
+
+/************************************************************************
+ * TTOC
+ ************************************************************************/
static UINT4
-block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+block_diff_ttoc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
if (plusp) {
- return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
if (plusp) {
- return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
}
}
static Genomediff_T
-block_diff_atoi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+block_diff_ttoc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
if (plusp) {
- return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
if (plusp) {
- return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
}
}
static UINT4
-block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+block_diff_ttoc_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
if (plusp) {
- return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
} else {
if (plusp) {
- return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
}
}
static Genomediff_T
-block_diff_atoi_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+block_diff_ttoc_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
if (plusp) {
- return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
} else {
if (plusp) {
- return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
- } else {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
}
}
}
/* Ignores snp_ptr */
static UINT4
-block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+block_diff_ttoc_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
if (plusp) {
- return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
if (plusp) {
- return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
}
}
/* Ignores snp_ptr */
static Genomediff_T
-block_diff_atoi_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+block_diff_ttoc_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
if (genestrand == +2) {
if (plusp) {
- return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
} else {
if (plusp) {
- return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
- } else {
return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+ } else {
+ return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
}
}
}
@@ -18117,7 +18260,9 @@ static Diffproc_snp_32_T block_diff_snp_32;
static Diffproc_T block_diff_sarray;
static Diffproc_32_T block_diff_sarray_32;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+/* Skip */
+#else
static __m128i _BOUND_HIGH;
static __m128i _BOUND_LOW;
#endif
@@ -18126,7 +18271,9 @@ void
Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in,
bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in,
Mode_T mode) {
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+/* Skip */
+#else
_BOUND_HIGH = _mm_set_epi32(128,96,64,32);
_BOUND_LOW = _mm_set_epi32(96,64,32,0);
#endif
@@ -18155,6 +18302,12 @@ Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in,
block_diff_32 = block_diff_atoi_32;
block_diff_sarray_32 = block_diff_atoi_sarray_32;
break;
+ case TTOC_STRANDED: case TTOC_NONSTRANDED:
+ block_diff = block_diff_ttoc;
+ block_diff_sarray = block_diff_ttoc_sarray;
+ block_diff_32 = block_diff_ttoc_32;
+ block_diff_sarray_32 = block_diff_ttoc_sarray_32;
+ break;
default: fprintf(stderr,"Mode %d not recognized\n",mode); abort();
}
@@ -18175,6 +18328,10 @@ Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in,
block_diff_snp = block_diff_atoi_snp;
block_diff_snp_32 = block_diff_atoi_snp_32;
break;
+ case TTOC_STRANDED: case TTOC_NONSTRANDED:
+ block_diff_snp = block_diff_ttoc_snp;
+ block_diff_snp_32 = block_diff_ttoc_snp_32;
+ break;
default: fprintf(stderr,"Mode %d not recognized\n",mode); abort();
}
#endif
@@ -18205,6 +18362,10 @@ Genome_hr_user_setup (UINT4 *ref_blocks_in,
block_diff = block_diff_atoi;
block_diff_32 = block_diff_atoi_32;
break;
+ case TTOC_STRANDED: case TTOC_NONSTRANDED:
+ block_diff = block_diff_ttoc;
+ block_diff_32 = block_diff_ttoc_32;
+ break;
default: fprintf(stderr,"Mode %d not recognized\n",mode); abort();
}
@@ -18225,6 +18386,10 @@ Genome_hr_user_setup (UINT4 *ref_blocks_in,
block_diff_snp = block_diff_atoi_snp;
block_diff_snp_32 = block_diff_atoi_snp_32;
break;
+ case TTOC_STRANDED: case TTOC_NONSTRANDED:
+ block_diff_snp = block_diff_ttoc_snp;
+ block_diff_snp_32 = block_diff_ttoc_snp_32;
+ break;
default: fprintf(stderr,"Mode %d not recognized\n",mode); abort();
}
#endif
@@ -18251,7 +18416,72 @@ Genome_hr_user_setup (UINT4 *ref_blocks_in,
#define set_end_mask(enddiscard) (~0U << enddiscard)
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+
+#define nonzero_p(diff) diff
+
+#define clear_start(diff,startdiscard) (diff & (~0U << (startdiscard)))
+#define clear_end(diff,enddiscard) (diff & ~(~0U << (enddiscard)))
+
+/* Same speed: clear_highbit(diff,relpos) (diff - (HIGH_BIT >> relpos)) */
+/* Note: xor assumes that bit at relpos was on */
+#define clear_highbit(diff,relpos) (diff ^ (HIGH_BIT >> relpos))
+
+/* Slower: clear_lowbit(diff,relpos) diff -= (1 << relpos) */
+#define clear_lowbit(diff,relpos) (diff & (diff - 1));
+
+
+#ifdef HAVE_POPCNT
+#define popcount_ones(diff) (_popcnt32(diff))
+#elif defined(HAVE_MM_POPCNT)
+#define popcount_ones(diff) (_mm_popcnt_u32(diff))
+#elif defined(HAVE_BUILTIN_POPCOUNT)
+#define popcount_ones(diff) (__builtin_popcount(diff))
+#else
+#define popcount_ones(diff) (count_bits[diff & 0x0000FFFF] + count_bits[diff >> 16])
+#endif
+
+
+#ifdef HAVE_LZCNT
+#define count_leading_zeroes(diff) _lzcnt_u32(diff)
+#elif defined(HAVE_BUILTIN_CLZ)
+#define count_leading_zeroes(diff) __builtin_clz(diff)
+#else
+#define count_leading_zeroes(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff])
+#endif
+
+#ifdef HAVE_TZCNT
+#define count_trailing_zeroes(diff) _tzcnt_u32(diff)
+#elif defined(HAVE_BUILTIN_CTZ)
+#define count_trailing_zeroes(diff) __builtin_ctz(diff)
+#else
+/* lowbit = -diff & diff */
+#define count_trailing_zeroes(diff) mod_37_bit_position[(-diff & diff) % 37]
+#endif
+
+/* For trimming */
+#define set_start(diff,startdiscard) (diff | ~(~0U << startdiscard))
+#define set_end(diff,enddiscard) (diff | (~0U << enddiscard))
+
+static void
+print_diff_popcount (UINT4 diff) {
+ printf("diff: %08X => nmismatches %d\n",diff,popcount_ones(diff));
+ return;
+}
+
+static void
+print_diff_trailing_zeroes (UINT4 diff, int offset) {
+ printf("diff: %08X => offset %d + trailing zeroes %d\n",diff,offset,count_trailing_zeroes(diff));
+ return;
+}
+
+static void
+print_diff_leading_zeroes (UINT4 diff, int offset) {
+ printf("diff: %08X => offset %d - leading zeroes %d\n",diff,offset,count_leading_zeroes(diff));
+ return;
+}
+
+#else /* littleendian and SSE2 */
#ifdef HAVE_SSE4_1
#define nonzero_p(diff) !_mm_testz_si128(diff,diff)
@@ -18625,72 +18855,7 @@ print_diff_leading_zeroes (__m128i _diff, int offset) {
}
#endif
-#else
-
-#define nonzero_p(diff) diff
-
-#define clear_start(diff,startdiscard) (diff & (~0U << (startdiscard)))
-#define clear_end(diff,enddiscard) (diff & ~(~0U << (enddiscard)))
-
-/* Same speed: clear_highbit(diff,relpos) (diff - (HIGH_BIT >> relpos)) */
-/* Note: xor assumes that bit at relpos was on */
-#define clear_highbit(diff,relpos) (diff ^ (HIGH_BIT >> relpos))
-
-/* Slower: clear_lowbit(diff,relpos) diff -= (1 << relpos) */
-#define clear_lowbit(diff,relpos) (diff & (diff - 1));
-
-
-#ifdef HAVE_POPCNT
-#define popcount_ones(diff) (_popcnt32(diff))
-#elif defined(HAVE_MM_POPCNT)
-#define popcount_ones(diff) (_mm_popcnt_u32(diff))
-#elif defined(HAVE_BUILTIN_POPCOUNT)
-#define popcount_ones(diff) (__builtin_popcount(diff))
-#else
-#define popcount_ones(diff) (count_bits[diff & 0x0000FFFF] + count_bits[diff >> 16])
-#endif
-
-
-#ifdef HAVE_LZCNT
-#define count_leading_zeroes(diff) _lzcnt_u32(diff)
-#elif defined(HAVE_BUILTIN_CLZ)
-#define count_leading_zeroes(diff) __builtin_clz(diff)
-#else
-#define count_leading_zeroes(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff])
-#endif
-
-#ifdef HAVE_TZCNT
-#define count_trailing_zeroes(diff) _tzcnt_u32(diff)
-#elif defined(HAVE_BUILTIN_CTZ)
-#define count_trailing_zeroes(diff) __builtin_ctz(diff)
-#else
-/* lowbit = -diff & diff */
-#define count_trailing_zeroes(diff) mod_37_bit_position[(-diff & diff) % 37]
-#endif
-
-/* For trimming */
-#define set_start(diff,startdiscard) (diff | ~(~0U << startdiscard))
-#define set_end(diff,enddiscard) (diff | (~0U << enddiscard))
-
-static void
-print_diff_popcount (UINT4 diff) {
- printf("diff: %08X => nmismatches %d\n",diff,popcount_ones(diff));
- return;
-}
-
-static void
-print_diff_trailing_zeroes (UINT4 diff, int offset) {
- printf("diff: %08X => offset %d + trailing zeroes %d\n",diff,offset,count_trailing_zeroes(diff));
- return;
-}
-
-static void
-print_diff_leading_zeroes (UINT4 diff, int offset) {
- printf("diff: %08X => offset %d - leading zeroes %d\n",diff,offset,count_leading_zeroes(diff));
- return;
-}
-
-#endif
+#endif /* littleendian and SSE2 */
#define nonzero_p_32(diff) diff
@@ -18787,12 +18952,13 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
- diff_32 = (block_diff_sarray_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_sarray_32)(query_shifted,&(ref_blocks[startblocki_32]),
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+#else
+ diff_32 = (block_diff_sarray_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -18818,8 +18984,7 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -18835,7 +19000,8 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff_sarray)(query_shifted,&(ref_blocks[startblocki]),
plusp,genestrand,/*query_unk_mismatch_local_p*/true);
@@ -18869,11 +19035,11 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[startblocki]);
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr = &(ref_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
@@ -18888,10 +19054,10 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr += 12;
#endif
offset += STEP_SIZE; /* 128 or 32 */
}
@@ -18911,7 +19077,8 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
return (pos3 - pos5);
}
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -18966,13 +19133,13 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
offset = (pos3 - 1) - enddiscard + 32;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_sarray_32)(query_shifted
-#ifdef HAVE_SSE2
- + endcolumni
-#endif
- ,&(ref_blocks[endblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_sarray_32)(query_shifted,&(ref_blocks[endblocki_32]),
+ plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+#else
+ diff_32 = (block_diff_sarray_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]),
plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -18998,8 +19165,7 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -19015,7 +19181,8 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (startblocki == endblocki) {
diff = (block_diff_sarray)(query_shifted,&(ref_blocks[endblocki]),
plusp,genestrand,/*query_unk_mismatch_local_p*/true);
@@ -19049,11 +19216,11 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
}
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[endblocki-12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[endblocki]);
ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#else
+ ptr = &(ref_blocks[endblocki-12]);
#endif
start = &(ref_blocks[startblocki]);
offset -= STEP_SIZE; /* 128 or 32 */
@@ -19068,10 +19235,10 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
}
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr -= 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#else
+ ptr -= 12;
#endif
offset -= STEP_SIZE; /* 128 or 32 */
}
@@ -19091,7 +19258,8 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
return (pos3 - pos5);
}
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -19162,14 +19330,24 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
ptr1 = &(ref_blocks[startblocki_1]);
ptr2 = &(ref_blocks[startblocki_2]);
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = Bigendian_convert_uint(ptr1[0]) << nshift;
+ shifted1[1] = Bigendian_convert_uint(ptr1[4]) << nshift;
+ shifted1[2] = Bigendian_convert_uint(ptr1[8]) << nshift;
+#else
shifted1[0] = ptr1[0] << nshift;
shifted1[1] = ptr1[4] << nshift;
shifted1[2] = ptr1[8] << nshift;
+#endif
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
diff = clear_start_32(diff,startdiscard);
diff = clear_end_32(diff,enddiscard);
@@ -19195,14 +19373,24 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
/* Block 1 */
ptr1 = &(ref_blocks[startblocki_1]);
ptr2 = &(ref_blocks[startblocki_2]);
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = Bigendian_convert_uint(ptr1[0]) << nshift;
+ shifted1[1] = Bigendian_convert_uint(ptr1[4]) << nshift;
+ shifted1[2] = Bigendian_convert_uint(ptr1[8]) << nshift;
+#else
shifted1[0] = ptr1[0] << nshift;
shifted1[1] = ptr1[4] << nshift;
shifted1[2] = ptr1[8] << nshift;
+#endif
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
diff = clear_start_32(diff,startdiscard);
if (diff /* != 0U */) {
@@ -19223,17 +19411,31 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
/* Block 2 */
if (nshift == 0) {
/* rightshift of 32 is a no-op */
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]);
+#else
shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8];
+#endif
} else {
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
+ shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
+ shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
+#else
shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift);
shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift);
shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift);
+#endif
}
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
diff = clear_end_32(diff,enddiscard);
if (diff /* != 0U */) {
@@ -19256,14 +19458,24 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
ptr1 = &(ref_blocks[startblocki_1]);
ptr2 = &(ref_blocks[startblocki_2]);
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = Bigendian_convert_uint(ptr1[0]) << nshift;
+ shifted1[1] = Bigendian_convert_uint(ptr1[4]) << nshift;
+ shifted1[2] = Bigendian_convert_uint(ptr1[8]) << nshift;
+#else
shifted1[0] = ptr1[0] << nshift;
shifted1[1] = ptr1[4] << nshift;
shifted1[2] = ptr1[8] << nshift;
+#endif
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
diff = clear_start_32(diff,startdiscard);
diff = clear_end_32(diff,enddiscard);
@@ -19285,14 +19497,24 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
/* Startblock */
ptr1 = &(ref_blocks[startblocki_1]);
ptr2 = &(ref_blocks[startblocki_2]);
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift);
+ shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift);
+ shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift);
+#else
shifted1[0] = (ptr1[0] << nshift);
shifted1[1] = (ptr1[4] << nshift);
shifted1[2] = (ptr1[8] << nshift);
+#endif
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
diff = clear_start_32(diff,startdiscard);
if (diff /* != 0U */) {
@@ -19313,17 +19535,31 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
while (ptr1 < end && ptr2 < end) {
if (nshift == 0) {
/* rightshift of 32 is a no-op */
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]);
+#else
shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8];
+#endif
} else {
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
+ shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
+ shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
+#else
shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift);
shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift);
shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift);
+#endif
}
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
if (diff /* != 0U */) {
#ifdef HAVE_BUILTIN_CTZ
mismatch_position = offset + (relpos = __builtin_ctz(diff));
@@ -19363,17 +19599,31 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
/* Block 1 */
if (nshift == 0) {
/* rightshift of 32 is a no-op */
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]);
+#else
shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8];
+#endif
} else {
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
+ shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
+ shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
+#else
shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift);
shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift);
shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift);
+#endif
}
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
if (nblocks == 1) {
diff = clear_end_32(diff,enddiscard);
}
@@ -19397,14 +19647,24 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
}
/* Block 2 */
+#ifdef WORDS_BIGENDIAN
+ shifted1[0] = (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
+ shifted1[1] = (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
+ shifted1[2] = (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
+#else
shifted1[0] = (ptr1_prev[0] >> rightshift);
shifted1[1] = (ptr1_prev[4] >> rightshift);
shifted1[2] = (ptr1_prev[8] >> rightshift);
+#endif
debug2(Compress_print_one_block(ptr1));
debug2(Compress_print_one_block(ptr2));
debug2(Compress_print_one_block(shifted1));
+#ifdef WORDS_BIGENDIAN
+ diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
+#endif
diff = clear_end_32(diff,enddiscard);
if (diff /* != 0U */) {
@@ -19474,12 +19734,13 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
- diff_32 = (block_diff_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -19494,8 +19755,7 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -19509,7 +19769,8 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
}
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
debug(printf("** Single block **\n"));
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
@@ -19585,11 +19846,11 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
/* 2..(n-1) / n: Check all middle blocks first */
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[startblocki]);
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr = &(ref_blocks[startblocki+12]);
#endif
endblock = &(ref_blocks[endblocki]);
nmismatches = 0;
@@ -19604,10 +19865,10 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr += 12;
#endif
}
@@ -19658,7 +19919,8 @@ count_mismatches_limit (Compress_T query_compress, Univcoord_T left,
return nmismatches + popcount_ones(diff);
}
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -19714,12 +19976,13 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
- diff_32 = (block_diff_snp_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -19734,8 +19997,7 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -19749,7 +20011,8 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
}
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
debug(printf("** Single block **\n"));
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
@@ -19827,13 +20090,13 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
/* 2..(n-1) / n: Check all middle blocks first */
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr = &(ref_blocks[startblocki+12]);
- alt_ptr = &(snp_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr = &(ref_blocks[startblocki]);
alt_ptr = &(snp_blocks[startblocki]);
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr = &(ref_blocks[startblocki+12]);
+ alt_ptr = &(snp_blocks[startblocki+12]);
#endif
endblock = &(ref_blocks[endblocki]);
nmismatches = 0;
@@ -19848,10 +20111,10 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr += 12; alt_ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr += 12; alt_ptr += 12;
#endif
}
@@ -19902,7 +20165,8 @@ count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int po
return nmismatches + popcount_ones(diff);
}
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -19978,13 +20242,13 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
enddiscard = (left+pos3) % 32;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -19999,8 +20263,7 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -20015,7 +20278,8 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -20038,11 +20302,11 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
nmismatches = popcount_ones(diff);
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[startblocki]);
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr = &(ref_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
while (ptr < end) {
@@ -20052,10 +20316,10 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
nmismatches += popcount_ones(diff);
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr += 12;
#endif
}
@@ -20067,7 +20331,8 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
return nmismatches + popcount_ones(diff);
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -20119,13 +20384,13 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
enddiscard = (left+pos3) % 32;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_snp_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20140,8 +20405,7 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -20156,7 +20420,8 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -20179,13 +20444,13 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
nmismatches = popcount_ones(diff);
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr = &(ref_blocks[startblocki+12]);
- alt_ptr = &(snp_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr = &(ref_blocks[startblocki]);
alt_ptr = &(snp_blocks[startblocki]);
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr = &(ref_blocks[startblocki+12]);
+ alt_ptr = &(snp_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
while (ref_ptr < end) {
@@ -20195,10 +20460,10 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
nmismatches += popcount_ones(diff);
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr += 12; alt_ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr += 12; alt_ptr += 12;
#endif
}
@@ -20210,7 +20475,8 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
return nmismatches + popcount_ones(diff);
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -20409,13 +20675,13 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
offset = -startdiscard + pos5;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_local_p);
+#else
+ diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_local_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20436,8 +20702,7 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -20453,7 +20718,8 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_local_p);
@@ -20487,11 +20753,11 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[startblocki]);
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr = &(ref_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
@@ -20509,10 +20775,10 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr += 12;
#endif
offset += STEP_SIZE; /* 128 or 32 */
}
@@ -20529,7 +20795,8 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
return nmismatches;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
@@ -20584,13 +20851,13 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
offset = -startdiscard + pos5;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_snp_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_local_p);
+#else
+ diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_local_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20610,8 +20877,7 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -20627,7 +20893,8 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_local_p);
@@ -20661,13 +20928,13 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr = &(ref_blocks[startblocki+12]);
- alt_ptr = &(snp_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr = &(ref_blocks[startblocki]);
alt_ptr = &(snp_blocks[startblocki]);
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr = &(ref_blocks[startblocki+12]);
+ alt_ptr = &(snp_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
@@ -20685,10 +20952,10 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr += 12; alt_ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr += 12; alt_ptr += 12;
#endif
offset += STEP_SIZE; /* 128 or 32 */
}
@@ -20705,7 +20972,8 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
return nmismatches_both;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -20847,13 +21115,13 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
offset = (pos3 - 1) - enddiscard + 32;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_32)(query_shifted
-#ifdef HAVE_SSE2
- + endcolumni
-#endif
- ,&(ref_blocks[endblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[endblocki_32]),
+ plusp,genestrand,query_unk_mismatch_local_p);
+#else
+ diff_32 = (block_diff_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]),
plusp,genestrand,query_unk_mismatch_local_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -20873,8 +21141,7 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -20891,7 +21158,8 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (startblocki == endblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
plusp,genestrand,query_unk_mismatch_local_p);
@@ -20925,11 +21193,11 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
}
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[endblocki-12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[endblocki]);
ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#else
+ ptr = &(ref_blocks[endblocki-12]);
#endif
start = &(ref_blocks[startblocki]);
offset -= STEP_SIZE; /* 128 or 32 */
@@ -20947,10 +21215,10 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
}
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr -= 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#else
+ ptr -= 12;
#endif
offset -= STEP_SIZE; /* 128 or 32 */
}
@@ -20968,7 +21236,8 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches));
return nmismatches;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -21024,13 +21293,13 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
offset = (pos3 - 1) - enddiscard + 32;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_snp_32)(query_shifted
-#ifdef HAVE_SSE2
- + endcolumni
-#endif
- ,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
+ plusp,genestrand,query_unk_mismatch_local_p);
+#else
+ diff_32 = (block_diff_snp_32)(query_shifted + endcolumni,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
plusp,genestrand,query_unk_mismatch_local_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -21050,8 +21319,7 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -21067,7 +21335,8 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (startblocki == endblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
plusp,genestrand,query_unk_mismatch_local_p);
@@ -21101,13 +21370,13 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
}
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr = &(ref_blocks[endblocki-12]);
- alt_ptr = &(snp_blocks[endblocki-12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr = &(ref_blocks[endblocki]);
alt_ptr = &(snp_blocks[endblocki]);
ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
+#else
+ ref_ptr = &(ref_blocks[endblocki-12]);
+ alt_ptr = &(snp_blocks[endblocki-12]);
#endif
start = &(ref_blocks[startblocki]);
offset -= STEP_SIZE; /* 128 or 32 */
@@ -21125,10 +21394,10 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
}
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr -= 12; alt_ptr -= 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
+#else
+ ref_ptr -= 12; alt_ptr -= 12;
#endif
offset -= STEP_SIZE; /* 128 or 32 */
}
@@ -21146,7 +21415,8 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both));
return nmismatches_both;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -21267,8 +21537,8 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
endcolumni = ((left+pos3) % 128) / 32;
endblocki_32 = endblocki + endcolumni;
- debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d\n",
- left,pos5,pos3,startblocki,endblocki,plusp));
+ debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d, step_size %d\n",
+ left,pos5,pos3,startblocki,endblocki,plusp,STEP_SIZE));
nshift = left % STEP_SIZE;
query_shifted = Compress_shift(query_compress,nshift);
@@ -21291,12 +21561,13 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
- diff_32 = (block_diff_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -21322,8 +21593,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -21347,7 +21617,8 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -21388,11 +21659,11 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[startblocki]);
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr = &(ref_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
@@ -21411,10 +21682,10 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr += 12;
#endif
offset += STEP_SIZE; /* 128 or 32 */
}
@@ -21437,7 +21708,8 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
debug14(if (endblocki_32 == startblocki) assert(answer == nmismatches));
return nmismatches;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -21502,12 +21774,13 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
- diff_32 = (block_diff_snp_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard);
diff_32 = clear_end_32(diff_32,enddiscard);
@@ -21533,8 +21806,7 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -21558,7 +21830,8 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN)|| !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -21599,13 +21872,13 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr = &(ref_blocks[startblocki+12]);
- alt_ptr = &(snp_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr = &(ref_blocks[startblocki]);
alt_ptr = &(snp_blocks[startblocki]);
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr = &(ref_blocks[startblocki+12]);
+ alt_ptr = &(snp_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
offset += STEP_SIZE; /* 128 or 32 */
@@ -21624,10 +21897,10 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
}
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr += 12; alt_ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr += 12; alt_ptr += 12;
#endif
offset += STEP_SIZE; /* 128 or 32 */
}
@@ -21650,7 +21923,8 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
return nmismatches_both;
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -21739,13 +22013,13 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
offset = (pos3 - 1) - enddiscard + 32;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_32)(query_shifted
-#ifdef HAVE_SSE2
- + endcolumni
-#endif
- ,&(ref_blocks[endblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[endblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */
diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */
@@ -21779,8 +22053,7 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -21797,7 +22070,8 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (startblocki == endblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -21894,7 +22168,27 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff >> 16);
+ bestscore = score_high[p];
+ trimpos = offset - score_high[p+1];
+ totalscore = score_high[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
bestscore = score_high[p];
trimpos = offset - score_high[p+1];
@@ -21972,39 +22266,42 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset -= 16;
-
-#else
- p = 3*(diff >> 16);
- bestscore = score_high[p];
- trimpos = offset - score_high[p+1];
- totalscore = score_high[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
-
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
#endif
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[endblocki-12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[endblocki]);
ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#else
+ ptr = &(ref_blocks[endblocki-12]);
#endif
start = &(ref_blocks[startblocki]);
while (ptr > start) {
diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff >> 16);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
if ((score = score_high[p] + totalscore) > bestscore) {
bestscore = score;
@@ -22084,34 +22381,13 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset -= 16;
-
-#else
- p = 3*(diff >> 16);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
-
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
#endif
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr -= 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#else
+ ptr -= 12;
#endif
}
@@ -22119,7 +22395,29 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff >> 16);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ /* totalscore += score_high[p+2]; */
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ /* offset -= 16; */
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
if ((score = score_high[p] + totalscore) > bestscore) {
bestscore = score;
@@ -22199,33 +22497,13 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset -= 16;
-
-#else
- p = 3*(diff >> 16);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
-
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- /* totalscore += score_high[p+2]; */
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- /* offset -= 16; */
#endif
debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1));
return (trimpos - 1); /* trimpos-1 is on side of mismatch */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -22281,13 +22559,13 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
offset = (pos3 - 1) - enddiscard + 32;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_snp_32)(query_shifted
-#ifdef HAVE_SSE2
- + endcolumni
-#endif
- ,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#else
+ diff_32 = (block_diff_snp_32)(query_shifted + endcolumni,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */
diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */
@@ -22322,8 +22600,7 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -22339,7 +22616,8 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (startblocki == endblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -22437,7 +22715,27 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
plusp,genestrand,query_unk_mismatch_p);
diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff >> 16);
+ bestscore = score_high[p];
+ trimpos = offset - score_high[p+1];
+ totalscore = score_high[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
bestscore = score_high[p];
trimpos = offset - score_high[p+1];
@@ -22515,41 +22813,44 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset -= 16;
-
-#else
- p = 3*(diff >> 16);
- bestscore = score_high[p];
- trimpos = offset - score_high[p+1];
- totalscore = score_high[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
-
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
#endif
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr = &(ref_blocks[endblocki-12]);
- alt_ptr = &(snp_blocks[endblocki-12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr = &(ref_blocks[endblocki]);
alt_ptr = &(snp_blocks[endblocki]);
ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
+#else
+ ref_ptr = &(ref_blocks[endblocki-12]);
+ alt_ptr = &(snp_blocks[endblocki-12]);
#endif
start = &(ref_blocks[startblocki]);
while (ref_ptr > start) {
diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff >> 16);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
if ((score = score_high[p] + totalscore) > bestscore) {
bestscore = score;
@@ -22629,34 +22930,13 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset -= 16;
-
-#else
- p = 3*(diff >> 16);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
-
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
#endif
query_shifted -= COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr -= 12; alt_ptr -= 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
+#else
+ ref_ptr -= 12; alt_ptr -= 12;
#endif
}
@@ -22665,7 +22945,29 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff >> 16);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ totalscore += score_high[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset -= 16;
+
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_high[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset - score_high[p+1];
+ }
+ /* totalscore += score_high[p+2]; */
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ /* offset -= 16; */
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,7));
if ((score = score_high[p] + totalscore) > bestscore) {
bestscore = score;
@@ -22745,33 +23047,13 @@ trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5,
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset -= 16;
-
-#else
- p = 3*(diff >> 16);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- totalscore += score_high[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset -= 16;
-
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_high[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset - score_high[p+1];
- }
- /* totalscore += score_high[p+2]; */
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- /* offset -= 16; */
#endif
debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1));
return (trimpos - 1); /* trimpos-1 is on side of mismatch */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -22828,14 +23110,13 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
offset = -startdiscard + pos5;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
-
+#else
+ diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */
diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */
@@ -22869,8 +23150,7 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -22886,7 +23166,8 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -22984,7 +23265,27 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
debug(printf("clearing start %08X\n",clear_start_mask(startdiscard)));
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff & 0x0000FFFF);
+ bestscore = score_low[p];
+ trimpos = offset + score_low[p+1];
+ totalscore = score_low[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+ p = 3*(diff >> 16);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
bestscore = score_low[p];
trimpos = offset + score_low[p+1];
@@ -23050,51 +23351,54 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
}
totalscore += score_low[p+2];
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
-
- p = 3*((unsigned short) _mm_extract_epi16(diff,7));
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset += 16;
-#else
- p = 3*(diff & 0x0000FFFF);
- bestscore = score_low[p];
- trimpos = offset + score_low[p+1];
- totalscore = score_low[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
-
- p = 3*(diff >> 16);
+ p = 3*((unsigned short) _mm_extract_epi16(diff,7));
if ((score = score_low[p] + totalscore) > bestscore) {
bestscore = score;
trimpos = offset + score_low[p+1];
}
totalscore += score_low[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset += 16;
#endif
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr = &(ref_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr = &(ref_blocks[startblocki]);
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr = &(ref_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
while (ptr < end) {
diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+ p = 3*(diff >> 16);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
if ((score = score_low[p] + totalscore) > bestscore) {
bestscore = score;
@@ -23174,34 +23478,13 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset += 16;
-
-#else
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
-
- p = 3*(diff >> 16);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
#endif
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#else
+ ptr += 12;
#endif
}
@@ -23209,7 +23492,29 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+ p = 3*(diff >> 16);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ /* totalscore += score_low[p+2]; */
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ /* offset += 16; */
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
if ((score = score_low[p] + totalscore) > bestscore) {
bestscore = score;
@@ -23289,33 +23594,13 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset += 16;
-
-#else
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
-
- p = 3*(diff >> 16);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- /* totalscore += score_low[p+2]; */
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- /* offset += 16; */
#endif
debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1));
return (trimpos + 1); /* trimpos+1 is on side of mismatch */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
@@ -23372,14 +23657,13 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
offset = -startdiscard + pos5;
debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
- diff_32 = (block_diff_snp_32)(query_shifted
-#ifdef HAVE_SSE2
- + startcolumni
-#endif
- ,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
plusp,genestrand,query_unk_mismatch_p);
-
+#else
+ diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
+ plusp,genestrand,query_unk_mismatch_p);
+#endif
diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */
diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */
@@ -23413,8 +23697,7 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
else {
#endif
-#ifdef HAVE_SSE2
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
startblocki = startblocki_32;
endblocki = endblocki_32;
#endif
@@ -23429,7 +23712,8 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
#endif
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
if (endblocki == startblocki) {
diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
plusp,genestrand,query_unk_mismatch_p);
@@ -23527,7 +23811,27 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff & 0x0000FFFF);
+ bestscore = score_low[p];
+ trimpos = offset + score_low[p+1];
+ totalscore = score_low[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+ p = 3*(diff >> 16);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
bestscore = score_low[p];
trimpos = offset + score_low[p+1];
@@ -23605,41 +23909,44 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset += 16;
-
-#else
- p = 3*(diff & 0x0000FFFF);
- bestscore = score_low[p];
- trimpos = offset + score_low[p+1];
- totalscore = score_low[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
-
- p = 3*(diff >> 16);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
#endif
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr = &(ref_blocks[startblocki+12]);
- alt_ptr = &(snp_blocks[startblocki+12]);
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr = &(ref_blocks[startblocki]);
alt_ptr = &(snp_blocks[startblocki]);
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr = &(ref_blocks[startblocki+12]);
+ alt_ptr = &(snp_blocks[startblocki+12]);
#endif
end = &(ref_blocks[endblocki]);
while (ref_ptr < end) {
diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+ p = 3*(diff >> 16);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
if ((score = score_low[p] + totalscore) > bestscore) {
bestscore = score;
@@ -23719,34 +24026,13 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset += 16;
-
-#else
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
-
- p = 3*(diff >> 16);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
#endif
query_shifted += COMPRESS_BLOCKSIZE;
-#ifdef HAVE_SSE2
- ref_ptr += 12; alt_ptr += 12;
-#else
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#else
+ ref_ptr += 12; alt_ptr += 12;
#endif
}
@@ -23755,7 +24041,29 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
-#ifdef HAVE_SSE2
+
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+ p = 3*(diff & 0x0000FFFF);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ totalscore += score_low[p+2];
+ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ offset += 16;
+
+ p = 3*(diff >> 16);
+ if ((score = score_low[p] + totalscore) > bestscore) {
+ bestscore = score;
+ trimpos = offset + score_low[p+1];
+ }
+ /* totalscore += score_low[p+2]; */
+ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+ diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+ /* offset += 16; */
+
+#else
p = 3*((unsigned short) _mm_extract_epi16(diff,0));
if ((score = score_low[p] + totalscore) > bestscore) {
bestscore = score;
@@ -23835,33 +24143,13 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
offset += 16;
-
-#else
- p = 3*(diff & 0x0000FFFF);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- totalscore += score_low[p+2];
- debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- offset += 16;
-
- p = 3*(diff >> 16);
- if ((score = score_low[p] + totalscore) > bestscore) {
- bestscore = score;
- trimpos = offset + score_low[p+1];
- }
- /* totalscore += score_low[p+2]; */
- debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
- diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
- /* offset += 16; */
#endif
debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1));
return (trimpos + 1); /* trimpos+1 is on side of mismatch */
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
}
#endif
}
diff --git a/src/get-genome.c b/src/get-genome.c
index e2e6e7a..10c3288 100644
--- a/src/get-genome.c
+++ b/src/get-genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: get-genome.c 161940 2015-03-25 20:36:59Z twu $";
+static char rcsid[] = "$Id: get-genome.c 170023 2015-07-17 16:47:21Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -72,6 +72,8 @@ static bool vareffect_p = false;
/* Dump options */
static bool dumpallp = false;
+static bool stream_chars_p = false;
+static bool stream_ints_p = false;
static bool dumpchrp = false;
static bool dumpchr_forsam_p = false;
static bool dumpsegsp = false;
@@ -105,6 +107,7 @@ static struct option long_options[] = {
/* Dump options */
{"dump", no_argument, 0, 'A'}, /* dumpallp */
+ {"stream-chars", no_argument, 0, 0}, /* stream_chars_p */
{"chromosomes", no_argument, 0, 'L'}, /* dumpchrp */
{"forsam", no_argument, 0, 0}, /* dumpchr_forsam_p */
{"contigs", no_argument, 0, 'I'}, /* dumpsegsp */
@@ -176,6 +179,7 @@ External map file options\n\
\n\
Dump options\n\
-A, --dump Dump entire genome in FASTA format\n\
+ --stream Dump entire genome as a single stream of ACGTX bytes\n\
-L, --chromosomes List all chromosomes with universal coordinates\n\
--forsam List all chromosomes for use in a SAM file\n\
-I, --contigs List all contigs with universal coordinates\n\
@@ -393,7 +397,9 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
Chrpos_T chrpos;
/* Handle reference strain */
- if (vareffect_p == true) {
+ if (stream_chars_p == true || stream_ints_p == true) {
+ /* Don't print a header */
+ } else if (vareffect_p == true) {
/* Don't print a header */
} else if (user_typestring != NULL) {
/* Don't print a header */
@@ -475,6 +481,16 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
Sequence_free(&genomicseg);
FREE(chromosome1);
+ } else if (stream_chars_p == true) {
+ genomicseg = Genome_get_segment(genome,genomicstart,genomiclength,chromosome_iit,revcomp);
+ Sequence_stdout_stream_chars(genomicseg);
+ Sequence_free(&genomicseg);
+
+ } else if (stream_ints_p == true) {
+ genomicseg = Genome_get_segment(genome,genomicstart,genomiclength,chromosome_iit,revcomp);
+ Sequence_stdout_stream_ints(genomicseg);
+ Sequence_free(&genomicseg);
+
} else if (snps_root == NULL || print_snps_mode == 0 || print_snps_mode == 2) {
genomicseg = Genome_get_segment(genome,genomicstart,genomiclength,chromosome_iit,revcomp);
if (user_typestring == NULL) {
@@ -1047,7 +1063,6 @@ int
main (int argc, char *argv[]) {
char *snpsdir = NULL;
char *iitfile;
- FILE *fp;
Genome_T genome = NULL, genomealt = NULL;
Univcoord_T genomicstart, chroffset;
Chrpos_T genomiclength, chrlength, chrstart, chrend;
@@ -1063,6 +1078,10 @@ main (int argc, char *argv[]) {
int *matches, nmatches, ndivs, i, *leftflanks, *rightflanks, nleftflanks = 0, nrightflanks = 0;
int sign;
+ int circular_typeint;
+ bool *circularp = NULL;
+ bool any_circular_p;
+
char *chr, *with_colon;
int indx;
bool allocp;
@@ -1098,6 +1117,12 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"vareffect")) {
vareffect_p = true;
+ } else if (!strcmp(long_name,"stream-chars")) {
+ stream_chars_p = true;
+
+ } else if (!strcmp(long_name,"stream-ints")) {
+ stream_ints_p = true;
+
} else {
/* Shouldn't reach here */
fprintf(stderr,"Don't recognize option %s. For usage, run 'get-genome --help'",long_name);
@@ -1157,7 +1182,47 @@ main (int argc, char *argv[]) {
snpsdir = user_snpsdir;
}
- if (dumpallp == true) {
+ if (stream_chars_p == true || stream_ints_p == true) {
+ iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
+ strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
+ sprintf(iitfile,"%s/%s.chromosome.iit",genomesubdir,fileroot);
+ chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false);
+ FREE(iitfile);
+
+ circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
+ circularp = Univ_IIT_circularp(&any_circular_p,chromosome_iit);
+
+ genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
+ uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
+
+ for (indx = 1; indx <= Univ_IIT_total_nintervals(chromosome_iit); indx++) {
+ chr = Univ_IIT_label(chromosome_iit,indx,&allocp);
+ with_colon = (char *) CALLOC(strlen(chr)+strlen(":")+1,sizeof(char));
+ sprintf(with_colon,"%s:",chr);
+ if (allocp == true) {
+ FREE(chr);
+ }
+ if (Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
+ &chroffset,&chrlength,with_colon,genomesubdir,fileroot) == true) {
+ print_sequence(genome,/*genomealt*/NULL,genomicstart,genomiclength,chromosome_iit,
+ /*whole_chromosome_p*/true);
+ if (circularp[indx] == true) {
+ /* Print again, since internal genome represents circular chromosomes twice */
+ print_sequence(genome,/*genomealt*/NULL,genomicstart,genomiclength,chromosome_iit,
+ /*whole_chromosome_p*/true);
+ }
+ }
+ FREE(with_colon);
+ }
+
+ Genome_free(&genome);
+
+ Univ_IIT_free(&chromosome_iit);
+
+ return 0;
+
+
+ } else if (dumpallp == true) {
iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
sprintf(iitfile,"%s/%s.chromosome.iit",genomesubdir,fileroot);
diff --git a/src/gmap.c b/src/gmap.c
index 60f8f29..74a9046 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: gmap.c 168166 2015-06-24 03:57:10Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -5150,8 +5150,12 @@ parse_command_line (int argc, char *argv[], int optind) {
mode = ATOI_STRANDED;
} else if (!strcmp(optarg,"atoi-nonstranded")) {
mode = ATOI_NONSTRANDED;
+ } else if (!strcmp(optarg,"ttoc-stranded")) {
+ mode = TTOC_STRANDED;
+ } else if (!strcmp(optarg,"ttoc-nonstranded")) {
+ mode = TTOC_NONSTRANDED;
} else {
- fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, or atoi\n");
+ fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, atoi-nonstranded, ttoc-stranded, or ttoc-nonstranded\n");
return 9;
}
@@ -5831,6 +5835,7 @@ main (int argc, char *argv[]) {
if (nread > 1) {
multiple_sequences_p = true;
+#if 0
#ifdef HAVE_MMAP
if (offsetsstrm_access != USE_ALLOCATE || genome_access != USE_ALLOCATE) {
fprintf(stderr,"Note: >1 sequence detected, so index files are being memory mapped.\n");
@@ -5841,6 +5846,7 @@ main (int argc, char *argv[]) {
fprintf(stderr," For more speed, also try multiple threads (-t <int>), if you have multiple processors or cores.");
#endif
fprintf(stderr,"\n");
+#endif
}
#endif
@@ -6126,6 +6132,31 @@ main (int argc, char *argv[]) {
exit(9);
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (user_atoidir == NULL) {
+ modedir = genomesubdir;
+ } else {
+ modedir = user_atoidir;
+ }
+
+ if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
+ if ((indexdb_rev = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
} else {
/* Standard behavior */
if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
@@ -6234,6 +6265,30 @@ main (int argc, char *argv[]) {
exit(9);
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (user_atoidir == NULL) {
+ modedir = snpsdir;
+ } else {
+ modedir = user_atoidir;
+ }
+
+ if ((indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+ if ((indexdb_rev = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
} else {
indexdb_fwd = Indexdb_new_genome(&index1part,&index1interval,
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
@@ -6840,8 +6895,9 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
--atoidir=STRING Directory for A-to-I RNA editing index files (created using atoiindex)\n\
(default is location of genome index files specified using -D, -V, and -d)\n\
--mode=STRING Alignment mode: standard (default), cmet-stranded, cmet-nonstranded,\n\
- atoi-stranded, or atoi-nonstranded. Non-standard modes requires you\n\
- to have previously run the cmetindex or atoiindex programs on the genome\n\
+ atoi-stranded, atoi-nonstranded, ttoc-stranded, or ttoc-nonstranded.\n\
+ Non-standard modes requires you to have previously run the cmetindex\n\
+ or atoiindex programs (which also cover the ttoc modes) on the genome\n\
");
#endif
diff --git a/src/gsnap.c b/src/gsnap.c
index f6931d6..faf7281 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 166787 2015-06-02 18:00:56Z twu $";
+static char rcsid[] = "$Id: gsnap.c 168165 2015-06-24 03:56:57Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -279,6 +279,7 @@ static int exclude_ranks[1];
static MPI_Comm workers_comm;
static MPI_Group world_group, workers_group;
static int nthreads0;
+static bool master_is_worker_p = false; /* default behavior */
#endif
#ifdef HAVE_PTHREAD
@@ -779,6 +780,7 @@ check_compiler_assumptions () {
fprintf(stderr,"\n");
#ifdef HAVE_SSE2
+ /* With -mavx, compiler may use assembly instructions for _mm_set1_epi32 that don't work on non-AVX machines */
fprintf(stderr,"Checking compiler assumptions for SSE2: ");
fprintf(stderr,"%08X %08X",x,y);
a = _mm_xor_si128(_mm_set1_epi32(x),_mm_set1_epi32(y));
@@ -1680,8 +1682,12 @@ parse_command_line (int argc, char *argv[], int optind) {
mode = ATOI_STRANDED;
} else if (!strcmp(optarg,"atoi-nonstranded")) {
mode = ATOI_NONSTRANDED;
+ } else if (!strcmp(optarg,"ttoc-stranded")) {
+ mode = TTOC_STRANDED;
+ } else if (!strcmp(optarg,"ttoc-nonstranded")) {
+ mode = TTOC_NONSTRANDED;
} else {
- fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, or atoi-nonstranded\n");
+ fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, atoi-nonstranded, ttoc-stranded, or ttoc-nonstranded\n");
return 9;
}
@@ -1974,6 +1980,18 @@ parse_command_line (int argc, char *argv[], int optind) {
} else if (!strcmp(long_name,"read-group-platform")) {
sam_read_group_platform = optarg;
+#ifdef USE_MPI
+ } else if (!strcmp(long_name,"master-is-worker")) {
+ if (!strcmp(optarg,"1")) {
+ master_is_worker_p = true;
+ } else if (!strcmp(optarg,"0")) {
+ master_is_worker_p = false; /* Default */
+ } else {
+ fprintf(stderr,"--master-is-worker flag must be 0 or 1\n");
+ return 9;
+ }
+#endif
+
} else if (!strcmp(long_name,"print-snps")) {
print_snplabels_p = true;
@@ -2708,6 +2726,29 @@ worker_setup (char *genomesubdir, char *fileroot) {
exit(9);
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (user_atoidir == NULL) {
+ modedir = genomesubdir;
+ } else {
+ modedir = user_atoidir;
+ }
+
+ if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
+ if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
} else {
/* Standard behavior */
@@ -2810,6 +2851,28 @@ worker_setup (char *genomesubdir, char *fileroot) {
exit(9);
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (user_atoidir == NULL) {
+ modedir = snpsdir;
+ } else {
+ modedir = user_atoidir;
+ }
+
+ if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+ if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
+ required_index1part,required_index1interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,sharedp)) == NULL) {
+ fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
} else {
indexdb = Indexdb_new_genome(&index1part,&index1interval,
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
@@ -3307,7 +3370,6 @@ main (int argc, char *argv[]) {
#ifdef USE_MPI
Master_T master;
- bool master_is_worker_p;
char **files_master;
int nfiles_master;
FILE *input_parser, *input2_parser;
@@ -3403,8 +3465,18 @@ main (int argc, char *argv[]) {
MPI_Comm_size(MPI_COMM_WORLD,&nranks);
MPI_Debug_setup(myid);
- if ((nthreads0 = nthreads - 1) <= 0) {
- /* Exclude master rank 0 from workers_group */
+ nthreads0 = nthreads - 1;
+ if (master_is_worker_p == false) {
+ /* Default is to exclude master node from working */
+ exclude_ranks[0] = 0;
+ MPI_Comm_group(MPI_COMM_WORLD,&world_group);
+ MPI_Group_excl(world_group,1,exclude_ranks,&workers_group);
+ MPI_Comm_create(MPI_COMM_WORLD,workers_group,&workers_comm);
+ MPI_Group_free(&workers_group);
+ MPI_Group_free(&world_group);
+
+ } else if (nthreads0 <= 0) {
+ /* If insufficient threads, then also exclude master node from working */
exclude_ranks[0] = 0;
MPI_Comm_group(MPI_COMM_WORLD,&world_group);
MPI_Group_excl(world_group,1,exclude_ranks,&workers_group);
@@ -3418,7 +3490,7 @@ main (int argc, char *argv[]) {
MPI_Comm_group(MPI_COMM_WORLD,&world_group);
MPI_Comm_create(MPI_COMM_WORLD,world_group,&workers_comm);
MPI_Group_free(&world_group);
- master_is_worker_p = true;
+ /* master_is_worker_p = true; */
}
n_slave_ranks = nranks - 1; /* Don't include master, even if it's a worker */
@@ -3526,6 +3598,7 @@ main (int argc, char *argv[]) {
#endif
if (multiple_sequences_p == true) {
+#if 0
if (offsetsstrm_access != USE_ALLOCATE || genome_access != USE_ALLOCATE ||
sarray_access != USE_ALLOCATE || lcp_access != USE_ALLOCATE) {
fprintf(stderr,"Note: >1 sequence detected, so index files are being memory mapped.\n");
@@ -3537,6 +3610,7 @@ main (int argc, char *argv[]) {
#endif
fprintf(stderr,"\n");
}
+#endif
} else {
/* fprintf(stderr,"Note: only 1 sequence detected. Ignoring batch (-B) command\n"); */
@@ -4010,8 +4084,9 @@ is still designed to be fast.\n\
--atoidir=STRING Directory for A-to-I RNA editing index files (created using atoiindex)\n\
(default is location of genome index files specified using -D, -V, and -d)\n\
--mode=STRING Alignment mode: standard (default), cmet-stranded, cmet-nonstranded,\n\
- atoi-stranded, or atoi-nonstranded. Non-standard modes requires you\n\
- to have previously run the cmetindex or atoiindex programs on the genome\n\
+ atoi-stranded, atoi-nonstranded, ttoc-stranded, or ttoc-nonstranded.\n\
+ Non-standard modes requires you to have previously run the cmetindex\n\
+ or atoiindex programs (which also cover the ttoc modes) on the genome\n\
");
@@ -4277,6 +4352,17 @@ is still designed to be fast.\n\
");
fprintf(stdout,"\n");
+#ifdef USE_MPI
+ fprintf(stdout,"Options for MPI\n");
+ fprintf(stdout,"\
+ --master-is-worker=INT Determines whether master node allocates threads for performing computation\n\
+ in addition to coordinating input and output. Number of worker threads\n\
+ will be --nthreads minus 2\n\
+ Values: 0 (no, default), 1 (yes if enough worker threads available)\n\
+");
+ fprintf(stdout,"\n");
+#endif
+
/* Help options */
fprintf(stdout,"Help options\n");
fprintf(stdout,"\
diff --git a/src/iit-read-univ.c b/src/iit-read-univ.c
index 3cef0d9..f76e46c 100644
--- a/src/iit-read-univ.c
+++ b/src/iit-read-univ.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read-univ.c 161940 2015-03-25 20:36:59Z twu $";
+static char rcsid[] = "$Id: iit-read-univ.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -95,7 +95,7 @@ static char rcsid[] = "$Id: iit-read-univ.c 161940 2015-03-25 20:36:59Z twu $";
available). */
typedef struct Univ_FNode_T *Univ_FNode_T;
struct Univ_FNode_T {
- Univcoord_T value;
+ Univ_IIT_coord_T value;
int a;
int b;
int leftindex;
@@ -986,6 +986,7 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
new->nodes = (struct Univ_FNode_T *) CALLOC(new->nnodes,sizeof(struct Univ_FNode_T));
#ifdef WORDS_BIGENDIAN
if (new->coord_values_8p == true) {
+#ifdef HAVE_64_BIT
for (i = 0; i < new->nnodes; i++) {
Bigendian_fread_uint8(&(new->nodes[i].value),fp);
Bigendian_fread_int(&(new->nodes[i].a),fp);
@@ -994,6 +995,10 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
Bigendian_fread_int(&(new->nodes[i].rightindex),fp);
}
offset += (sizeof(UINT8)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int))*new->nnodes;
+#else
+ fprintf(stderr,"IIT file contains 64-bit coordinates, but this computer is only 32-bit. Cannot continue.\n");
+ exit(9);
+#endif
} else {
for (i = 0; i < new->nnodes; i++) {
Bigendian_fread_uint(&uint4,fp);
@@ -1007,6 +1012,7 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
}
#else
if (new->coord_values_8p == true) {
+#ifdef HAVE_64_BIT
#if 1
offset += sizeof(struct Univ_FNode_T)*fread(new->nodes,sizeof(struct Univ_FNode_T),new->nnodes,fp);
#else
@@ -1020,6 +1026,10 @@ read_tree_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T new) {
}
offset += (sizeof(UINT8)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int))*new->nnodes;
#endif
+#else
+ fprintf(stderr,"IIT file contains 64-bit coordinates, but this computer is only 32-bit. Cannot continue.\n");
+ exit(9);
+#endif
} else {
for (i = 0; i < new->nnodes; i++) {
FREAD_UINT(&uint4,fp);
@@ -1051,16 +1061,21 @@ read_intervals_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T n
#ifdef WORDS_BIGENDIAN
if (new->coord_values_8p == true) {
+#ifdef HAVE_64_BIT
for (i = 0; i < new->total_nintervals; i++) {
Bigendian_fread_uint8(&(new->intervals[i].low),fp);
Bigendian_fread_uint8(&(new->intervals[i].high),fp);
Bigendian_fread_int(&(new->intervals[i].type),fp);
}
+#else
+ fprintf(stderr,"IIT file contains 64-bit coordinates, but this computer is only 32-bit. Cannot continue.\n");
+ exit(9);
+#endif
} else {
for (i = 0; i < new->total_nintervals; i++) {
- Bigendian_fread_uint(&unit4,fp);
+ Bigendian_fread_uint(&uint4,fp);
new->intervals[i].low = (Univcoord_T) uint4;
- Bigendian_fread_uint(&unit4,fp);
+ Bigendian_fread_uint(&uint4,fp);
new->intervals[i].high = (Univcoord_T) uint4;
Bigendian_fread_int(&(new->intervals[i].type),fp);
}
@@ -1068,12 +1083,17 @@ read_intervals_univ (off_t offset, off_t filesize, FILE *fp, char *filename, T n
}
#else
if (new->coord_values_8p == true) {
+#ifdef HAVE_64_BIT
for (i = 0; i < new->total_nintervals; i++) {
FREAD_UINT8(&(new->intervals[i].low),fp);
FREAD_UINT8(&(new->intervals[i].high),fp);
FREAD_INT(&(new->intervals[i].type),fp);
}
offset += (sizeof(UINT8)+sizeof(UINT8)+sizeof(int))*new->total_nintervals;
+#else
+ fprintf(stderr,"IIT file contains 64-bit coordinates, but this computer is only 32-bit. Cannot continue.\n");
+ exit(9);
+#endif
} else {
for (i = 0; i < new->total_nintervals; i++) {
FREAD_UINT(&uint4,fp);
diff --git a/src/indexdb.c b/src/indexdb.c
index fa9232b..ec4b674 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 161940 2015-03-25 20:36:59Z twu $";
+static char rcsid[] = "$Id: indexdb.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -254,17 +254,14 @@ Indexdb_mean_size (T this, Mode_T mode, Width_T index1part) {
n = oligospace = power(this->alphabet_size,index1part);
#else
n = oligospace = power(4,index1part);
- if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED || mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
+ if (mode != STANDARD) {
n = power(3,index1part);
}
#endif
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
- return (double) this->offsetsstrm[this->offsetsmeta[oligospace/this->blocksize]]/(double) n;
- } else {
- return (double) Bigendian_convert_uint(this->offsetsstrm[this->offsetsmeta[oligospace/this->blocksize]])/(double) n;
- }
+ /* Also holds for ALLOCATED_PRIVATE and ALLOCATED_SHARED */
+ return (double) Bigendian_convert_uint(this->offsetsstrm[Bigendian_convert_uint(this->offsetsmeta[oligospace/this->blocksize])])/(double) n;
#else
return (double) this->offsetsstrm[this->offsetsmeta[oligospace/this->blocksize]]/(double) n;
#endif
@@ -2013,13 +2010,9 @@ Indexdb_read (int *nentries, T this, Storedoligomer_T aaindex) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
- ptr0 = this->offsetsstrm[aaindex];
- end0 = this->offsetsstrm[aaindex+1];
- } else {
- ptr0 = Bigendian_convert_uint(this->offsetsstrm[aaindex]);
- end0 = Bigendian_convert_uint(this->offsetsstrm[aaindex+1]);
- }
+ /* Also holds for ALLOCATED_PRIVATE and ALLOCATED_SHARED */
+ ptr0 = Bigendian_convert_uint(this->offsetsstrm[aaindex]);
+ end0 = Bigendian_convert_uint(this->offsetsstrm[aaindex+1]);
#else
ptr0 = this->offsetsstrm[aaindex];
end0 = this->offsetsstrm[aaindex+1];
@@ -2168,13 +2161,9 @@ Indexdb_read (int *nentries, T this, Storedoligomer_T oligo) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
- ptr0 = this->offsetsstrm[part0];
- end0 = this->offsetsstrm[part0+1];
- } else {
- ptr0 = Bigendian_convert_uint(this->offsetsstrm[part0]);
- end0 = Bigendian_convert_uint(this->offsetsstrm[part0+1]);
- }
+ /* Also holds for ALLOCATED_PRIVATE and ALLOCATED_SHARED */
+ ptr0 = Bigendian_convert_uint(this->offsetsstrm[part0]);
+ end0 = Bigendian_convert_uint(this->offsetsstrm[part0+1]);
#else
ptr0 = this->offsetsstrm[part0];
end0 = this->offsetsstrm[part0+1];
@@ -2321,13 +2310,9 @@ Indexdb_read_inplace (int *nentries,
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
- ptr0 = this->offsetsstrm[part0];
- end0 = this->offsetsstrm[part0+1];
- } else {
- ptr0 = Bigendian_convert_uint(this->offsetsstrm[part0]);
- end0 = Bigendian_convert_uint(this->offsetsstrm[part0+1]);
- }
+ /* Also holds for ALLOCATED_PRIVATE and ALLOCATED_SHARED */
+ ptr0 = Bigendian_convert_uint(this->offsetsstrm[part0]);
+ end0 = Bigendian_convert_uint(this->offsetsstrm[part0+1]);
#else
ptr0 = this->offsetsstrm[part0];
end0 = this->offsetsstrm[part0+1];
@@ -2394,13 +2379,9 @@ Indexdb_read_with_diagterm (int *nentries, T this, Storedoligomer_T oligo, int d
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
- ptr0 = this->offsetsstrm[oligo];
- end0 = this->offsetsstrm[oligo+1];
- } else {
- ptr0 = Bigendian_convert_uint(this->offsetsstrm[oligo]);
- end0 = Bigendian_convert_uint(this->offsetsstrm[oligo+1]);
- }
+ /* Also holds for ALLOCATED_PRIVATE and ALLOCATED_SHARED */
+ ptr0 = Bigendian_convert_uint(this->offsetsstrm[oligo]);
+ end0 = Bigendian_convert_uint(this->offsetsstrm[oligo+1]);
#else
ptr0 = this->offsetsstrm[oligo];
end0 = this->offsetsstrm[oligo+1];
@@ -2498,13 +2479,9 @@ Indexdb_read_with_diagterm_sizelimit (int *nentries, T this, Storedoligomer_T ol
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
- if (this->offsetsstrm_access == ALLOCATED_PRIVATE || this->offsetsstrm_access == ALLOCATED_SHARED) {
- ptr0 = this->offsetsstrm[oligo];
- end0 = this->offsetsstrm[oligo+1];
- } else {
- ptr0 = Bigendian_convert_uint(this->offsetsstrm[oligo]);
- end0 = Bigendian_convert_uint(this->offsetsstrm[oligo+1]);
- }
+ /* Also holds for ALLOCATED_PRIVATE and ALLOCATED_SHARED */
+ ptr0 = Bigendian_convert_uint(this->offsetsstrm[oligo]);
+ end0 = Bigendian_convert_uint(this->offsetsstrm[oligo+1]);
#else
ptr0 = this->offsetsstrm[oligo];
end0 = this->offsetsstrm[oligo+1];
diff --git a/src/indexdb_hr.c b/src/indexdb_hr.c
index 1731c2f..0ba9fb2 100644
--- a/src/indexdb_hr.c
+++ b/src/indexdb_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb_hr.c 153955 2014-11-24 17:54:45Z twu $";
+static char rcsid[] = "$Id: indexdb_hr.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -702,6 +702,7 @@ point_one_shift (int *nentries, T this, Storedoligomer_T subst) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
+#if 0
if (this->offsetsstrm_access == ALLOCATED) {
ptr0 = this->offsetsstrm[subst];
end0 = this->offsetsstrm[subst+1];
@@ -710,6 +711,9 @@ point_one_shift (int *nentries, T this, Storedoligomer_T subst) {
end0 = Bigendian_convert_uint(this->offsetsstrm[subst+1]);
}
#else
+ abort();
+#endif
+#else
ptr0 = this->offsetsstrm[subst];
end0 = this->offsetsstrm[subst+1];
#endif
@@ -808,6 +812,7 @@ count_one_shift (T this, Storedoligomer_T subst, int nadjacent) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
+#if 0
if (this->offsetsstrm_access == ALLOCATED) {
ptr0 = this->offsetsstrm[subst];
end0 = this->offsetsstrm[subst+nadjacent];
@@ -816,6 +821,9 @@ count_one_shift (T this, Storedoligomer_T subst, int nadjacent) {
end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]);
}
#else
+ abort();
+#endif
+#else
ptr0 = this->offsetsstrm[subst];
end0 = this->offsetsstrm[subst+nadjacent];
#endif
@@ -841,6 +849,7 @@ count_one_shift (T this, Storedoligomer_T subst, int nadjacent) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
+#if 0
if (this->offsetsstrm_access == ALLOCATED) {
ptr0 = this->offsetsstrm[subst];
end0 = this->offsetsstrm[subst+nadjacent];
@@ -849,6 +858,9 @@ count_one_shift (T this, Storedoligomer_T subst, int nadjacent) {
end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]);
}
#else
+ abort();
+#endif
+#else
ptr0 = this->offsetsstrm[subst];
end0 = this->offsetsstrm[subst+nadjacent];
#endif
@@ -1836,6 +1848,7 @@ Indexdb_count_no_subst (T this, Storedoligomer_T oligo) {
if (this->compression_type == NO_COMPRESSION) {
#ifdef WORDS_BIGENDIAN
+#if 0
if (this->offsetsstrm_access == ALLOCATED) {
ptr0 = this->offsetsstrm[oligo];
end0 = this->offsetsstrm[oligo+1];
@@ -1844,6 +1857,9 @@ Indexdb_count_no_subst (T this, Storedoligomer_T oligo) {
end0 = Bigendian_convert_uint(this->offsetsstrm[oligo+1]);
}
#else
+ abort();
+#endif
+#else
ptr0 = this->offsetsstrm[oligo];
end0 = this->offsetsstrm[oligo+1];
#endif
diff --git a/src/mode.h b/src/mode.h
index 1592c99..2fbc598 100644
--- a/src/mode.h
+++ b/src/mode.h
@@ -1,8 +1,8 @@
-/* $Id: mode.h 48805 2011-09-30 20:20:26Z twu $ */
+/* $Id: mode.h 167592 2015-06-15 18:56:59Z twu $ */
#ifndef MODE_INCLUDED
#define MODE_INCLUDED
-typedef enum {STANDARD, CMET_STRANDED, CMET_NONSTRANDED, ATOI_STRANDED, ATOI_NONSTRANDED} Mode_T;
+typedef enum {STANDARD, CMET_STRANDED, CMET_NONSTRANDED, ATOI_STRANDED, ATOI_NONSTRANDED, TTOC_STRANDED, TTOC_NONSTRANDED} Mode_T;
#endif
diff --git a/src/oligoindex_hr.c b/src/oligoindex_hr.c
index f4ebc19..c541ce9 100644
--- a/src/oligoindex_hr.c
+++ b/src/oligoindex_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: oligoindex_hr.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: oligoindex_hr.c 167575 2015-06-15 17:26:24Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -40,10 +40,18 @@ static char rcsid[] = "$Id: oligoindex_hr.c 166641 2015-05-29 21:13:04Z twu $";
#ifdef HAVE_SSE2
#define USE_SIMD_FOR_COUNTS 1
-#else
+#endif
+
+#if !defined(HAVE_SSE2)
#define INDIVIDUAL_SHIFTS 1
+#elif !defined(HAVE_SSE4_1)
+#define SIMD_MASK_THEN_STORE
+#define EXTRACT(x,i) x[i]
+#else
+#define EXTRACT(x,i) _mm_extract_epi32(x,i)
#endif
+
#define THETADIFF1 20.0
#define THETADIFF2 20.0
#define REPOLIGOCOUNT 8
@@ -196,7 +204,7 @@ print_counts (__m128i x, char *label) {
#endif
-#if !defined(HAVE_SSE2) || defined(CHECK_ASSERTIONS)
+#if !defined(HAVE_SSE2) || !defined(HAVE_SSE4_1) || defined(CHECK_ASSERTIONS)
static const Genomecomp_T reverse_nt[] =
{0x0000,0x4000,0x8000,0xC000,0x1000,0x5000,0x9000,0xD000,
0x2000,0x6000,0xA000,0xE000,0x3000,0x7000,0xB000,0xF000,
@@ -10210,7 +10218,11 @@ store_fwdrev_simd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, C
static void
count_9mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -10253,41 +10265,49 @@ count_9mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
#endif
@@ -10328,41 +10348,49 @@ count_9mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
#endif
@@ -10406,41 +10434,49 @@ count_9mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
#endif
@@ -10481,41 +10517,49 @@ count_9mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
#endif
@@ -10665,7 +10709,11 @@ static int
store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -10748,9 +10796,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10759,7 +10811,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10768,7 +10820,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10777,7 +10829,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10788,9 +10840,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10799,7 +10855,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10808,7 +10864,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10817,7 +10873,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10903,9 +10959,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10914,7 +10974,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10923,7 +10983,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10932,7 +10992,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10943,9 +11003,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10954,7 +11018,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10963,7 +11027,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -10972,7 +11036,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11061,9 +11125,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11072,7 +11140,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11081,7 +11149,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11090,7 +11158,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11101,9 +11169,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11112,7 +11184,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11121,7 +11193,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11130,7 +11202,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11216,9 +11288,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11227,7 +11303,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11236,7 +11312,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11245,7 +11321,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11256,9 +11332,13 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11267,7 +11347,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11276,7 +11356,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11285,7 +11365,7 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11303,7 +11383,11 @@ store_9mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_8mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -11342,37 +11426,45 @@ count_8mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
#endif
@@ -11417,41 +11509,49 @@ count_8mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
@@ -11496,37 +11596,45 @@ count_8mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
#endif
@@ -11571,41 +11679,49 @@ count_8mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
@@ -11760,7 +11876,11 @@ static int
store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -11841,9 +11961,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11853,7 +11977,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11863,7 +11987,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11873,7 +11997,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11885,9 +12009,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11897,7 +12025,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -11907,7 +12035,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12012,9 +12140,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12024,7 +12156,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12034,7 +12166,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12044,7 +12176,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12056,9 +12188,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12068,7 +12204,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12078,7 +12214,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12088,7 +12224,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12187,9 +12323,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12199,7 +12339,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12209,7 +12349,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12219,7 +12359,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12231,9 +12371,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12243,7 +12387,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12253,7 +12397,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12358,9 +12502,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12370,7 +12518,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12380,7 +12528,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12390,7 +12538,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12402,9 +12550,13 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12414,7 +12566,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12424,7 +12576,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12434,7 +12586,7 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -12463,7 +12615,11 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_7mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -12498,33 +12654,41 @@ count_7mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
#endif
@@ -12573,53 +12737,65 @@ count_7mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
#endif
@@ -12655,33 +12831,41 @@ count_7mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
#endif
@@ -12730,53 +12914,65 @@ count_7mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
#endif
@@ -12927,7 +13123,11 @@ static int
store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -12992,9 +13192,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13003,7 +13207,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13012,7 +13216,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13021,7 +13225,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13032,9 +13236,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13043,7 +13251,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13147,9 +13355,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13158,7 +13370,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13167,7 +13379,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13176,7 +13388,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13187,9 +13399,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13198,7 +13414,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13207,7 +13423,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13216,7 +13432,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13227,9 +13443,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13238,7 +13458,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13309,9 +13529,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13320,7 +13544,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13329,7 +13553,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13338,7 +13562,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13349,9 +13573,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13360,7 +13588,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13464,9 +13692,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13475,7 +13707,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13484,7 +13716,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13493,7 +13725,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13504,9 +13736,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13515,7 +13751,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13524,7 +13760,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13533,7 +13769,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13544,9 +13780,13 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13555,7 +13795,7 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -13572,7 +13812,11 @@ store_7mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_6mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -13603,21 +13847,25 @@ count_6mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
@@ -13675,57 +13923,69 @@ count_6mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
#endif
@@ -13757,21 +14017,25 @@ count_6mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
@@ -13829,57 +14093,69 @@ count_6mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
#endif
@@ -14031,7 +14307,11 @@ static int
store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -14087,9 +14367,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14098,7 +14382,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14107,7 +14391,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14116,7 +14400,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14239,9 +14523,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14250,7 +14538,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14259,7 +14547,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14268,7 +14556,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14279,9 +14567,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14290,7 +14582,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14299,7 +14591,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14308,7 +14600,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14319,9 +14611,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14330,7 +14626,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14339,7 +14635,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14401,9 +14697,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14412,7 +14712,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14421,7 +14721,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14430,7 +14730,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14553,9 +14853,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14564,7 +14868,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14573,7 +14877,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14582,7 +14886,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14593,9 +14897,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14604,7 +14912,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14613,7 +14921,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14622,7 +14930,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14633,9 +14941,13 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14644,7 +14956,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14653,7 +14965,7 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -14671,7 +14983,11 @@ store_6mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_5mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -14698,21 +15014,25 @@ count_5mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
#endif
@@ -14769,61 +15089,73 @@ count_5mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
#endif
@@ -14851,21 +15183,25 @@ count_5mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
#endif
@@ -14922,61 +15258,73 @@ count_5mers_fwd (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, G
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
#endif
@@ -15125,7 +15473,11 @@ static int
store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -15172,9 +15524,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15183,7 +15539,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15192,7 +15548,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15201,7 +15557,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15323,9 +15679,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15334,7 +15694,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15343,7 +15703,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15352,7 +15712,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15363,9 +15723,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15374,7 +15738,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15383,7 +15747,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15392,7 +15756,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15403,9 +15767,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15414,7 +15782,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15423,7 +15791,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15432,7 +15800,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15485,9 +15853,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15496,7 +15868,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15505,7 +15877,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15514,7 +15886,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15636,9 +16008,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15647,7 +16023,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15656,7 +16032,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15665,7 +16041,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15676,9 +16052,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15687,7 +16067,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15696,7 +16076,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15705,7 +16085,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15716,9 +16096,13 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15727,7 +16111,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15736,7 +16120,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -15745,7 +16129,7 @@ store_5mers_fwd (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -16160,12 +16544,15 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
#endif
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
+#endif
if (indexsize == 9) {
count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
@@ -16215,12 +16602,15 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
#endif
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
+#endif
if (indexsize == 9) {
count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
@@ -16281,19 +16671,20 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
- high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
#ifdef HAVE_SSE4_1
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -16339,10 +16730,13 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
count_9mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -16391,19 +16785,20 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
- high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
#ifdef HAVE_SSE4_1
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2);*/
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -16449,10 +16844,13 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
count_8mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -16501,19 +16899,20 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
- high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
#ifdef HAVE_SSE4_1
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -16559,10 +16958,13 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
count_7mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -16611,19 +17013,20 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
- high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
#ifdef HAVE_SSE4_1
+ high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -16669,10 +17072,13 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
count_6mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -16721,19 +17127,19 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -16779,10 +17185,13 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
count_5mers_fwd(counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -16826,10 +17235,13 @@ count_positions_fwd_simd (Count_T *counts, int indexsize,
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
if (indexsize == 9) {
count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
@@ -17253,12 +17665,15 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
#endif
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
+#endif
if (indexsize == 9) {
chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
@@ -17308,12 +17723,15 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
#endif
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(nexthigh_rev == (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
+#endif
if (indexsize == 9) {
chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
@@ -17374,19 +17792,19 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -17428,10 +17846,13 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
chrpos = store_9mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -17480,19 +17901,19 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -17534,10 +17955,13 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
chrpos = store_8mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -17586,19 +18010,19 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -17640,10 +18064,13 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
chrpos = store_7mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -17692,19 +18119,19 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -17746,10 +18173,13 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
chrpos = store_6mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -17798,19 +18228,19 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
+ /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
+ /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
+ /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-#ifdef HAVE_SSE4_1
temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
next = _mm_shuffle_epi32(temp,0x39);
#else
- low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
- high1_rev = (unsigned int) _mm_extract_epi32(current,2);
- assert(high1_rev == (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16));
- low1_rev = (unsigned int) _mm_extract_epi32(current,3);
- assert(low1_rev == (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16));
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+ high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+ low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
#endif
@@ -17852,10 +18282,13 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
chrpos = store_5mers_fwd(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
}
@@ -17899,10 +18332,13 @@ store_positions_fwd_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
#endif
nexthigh_rev = high0_rev;
+#ifdef HAVE_SSE4_1
high0_rev = (unsigned int) _mm_extract_epi32(current,0);
- assert(high0_rev == (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16));
low0_rev = (unsigned int) _mm_extract_epi32(current,1);
- assert(low0_rev == (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16));
+#else
+ high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+ low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+#endif
if (indexsize == 9) {
chrpos = store_9mers_fwd_partial(chrpos,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
@@ -18516,7 +18952,11 @@ store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positi
static void
count_9mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -18556,41 +18996,49 @@ count_9mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
#endif
@@ -18634,41 +19082,49 @@ count_9mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
#endif
@@ -18709,41 +19165,49 @@ count_9mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
#endif
@@ -18787,41 +19251,49 @@ count_9mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
#endif
@@ -18970,7 +19442,11 @@ static int
store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -19050,9 +19526,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19061,7 +19541,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19070,7 +19550,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19079,7 +19559,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19090,9 +19570,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19101,7 +19585,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19110,7 +19594,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19119,7 +19603,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19208,9 +19692,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19219,7 +19707,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19228,7 +19716,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19237,7 +19725,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19248,9 +19736,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19259,7 +19751,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19268,7 +19760,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19277,7 +19769,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19363,9 +19855,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19374,7 +19870,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19383,7 +19879,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19392,7 +19888,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19403,9 +19899,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19414,7 +19914,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19423,7 +19923,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19432,7 +19932,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19521,9 +20021,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19532,7 +20036,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19541,7 +20045,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19550,7 +20054,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19561,9 +20065,13 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
_masked = _mm_and_si128(_oligo, mask9);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19572,7 +20080,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19581,7 +20089,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19590,7 +20098,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -19607,7 +20115,11 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_8mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -19651,48 +20163,56 @@ count_8mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (low_rc & MASK8));
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((low_rc >> 2) & MASK8));
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((low_rc >> 4) & MASK8));
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((low_rc >> 6) & MASK8));
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((low_rc >> 8) & MASK8));
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((low_rc >> 10) & MASK8));
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((low_rc >> 12) & MASK8));
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((low_rc >> 14) & MASK8));
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
@@ -19738,43 +20258,51 @@ count_8mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (oligo & MASK8));
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 2) & MASK8));
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 4) & MASK8));
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((oligo >> 6) & MASK8));
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((oligo >> 8) & MASK8));
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 10) & MASK8));
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 12) & MASK8));
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
@@ -19820,48 +20348,56 @@ count_8mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (high_rc & MASK8));
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((high_rc >> 2) & MASK8));
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((high_rc >> 4) & MASK8));
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((high_rc >> 6) & MASK8));
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((high_rc >> 8) & MASK8));
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((high_rc >> 10) & MASK8));
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((high_rc >> 12) & MASK8));
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((high_rc >> 14) & MASK8));
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
@@ -19907,43 +20443,51 @@ count_8mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (oligo & MASK8));
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 2) & MASK8));
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 4) & MASK8));
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((oligo >> 6) & MASK8));
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((oligo >> 8) & MASK8));
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 10) & MASK8));
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 12) & MASK8));
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
@@ -20096,7 +20640,11 @@ static int
store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -20185,9 +20733,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (low_rc & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20197,7 +20749,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((low_rc >> 2) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20207,7 +20759,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((low_rc >> 4) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20217,7 +20769,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((low_rc >> 6) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20229,9 +20781,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((low_rc >> 8) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20241,7 +20797,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((low_rc >> 10) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20251,7 +20807,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((low_rc >> 12) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20261,7 +20817,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((low_rc >> 14) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20352,9 +20908,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (oligo & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20364,7 +20924,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 2) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20374,7 +20934,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 4) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20384,7 +20944,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((oligo >> 6) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20396,9 +20956,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((oligo >> 8) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20408,7 +20972,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 10) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20418,7 +20982,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 12) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20514,9 +21078,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (high_rc & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20526,7 +21094,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((high_rc >> 2) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20536,7 +21104,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((high_rc >> 4) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20546,7 +21114,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((high_rc >> 6) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20558,9 +21126,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((high_rc >> 8) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20570,7 +21142,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((high_rc >> 10) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20580,7 +21152,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((high_rc >> 12) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20590,7 +21162,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((high_rc >> 14) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20681,9 +21253,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == (oligo & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20693,7 +21269,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 2) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20703,7 +21279,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 4) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20713,7 +21289,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
assert(masked == ((oligo >> 6) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20725,9 +21301,13 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
_masked = _mm_and_si128(_oligo, mask8);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
assert(masked == ((oligo >> 8) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20737,7 +21317,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
assert(masked == ((oligo >> 10) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20747,7 +21327,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
assert(masked == ((oligo >> 12) & MASK8));
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
@@ -20766,7 +21346,11 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_7mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -20814,53 +21398,65 @@ count_7mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
#endif
@@ -20896,33 +21492,41 @@ count_7mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
#endif
@@ -20971,53 +21575,65 @@ count_7mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
#endif
@@ -21053,33 +21669,41 @@ count_7mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
#endif
@@ -21229,7 +21853,11 @@ static Chrpos_T
store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -21327,9 +21955,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21338,7 +21970,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21347,7 +21979,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21356,7 +21988,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21367,9 +21999,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21378,7 +22014,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21387,7 +22023,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21396,7 +22032,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21407,9 +22043,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21418,7 +22058,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21489,9 +22129,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21500,7 +22144,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21509,7 +22153,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21518,7 +22162,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21529,9 +22173,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21540,7 +22188,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21645,9 +22293,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21656,7 +22308,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21665,7 +22317,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21674,7 +22326,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21685,9 +22337,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21696,7 +22352,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21705,7 +22361,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21714,7 +22370,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21725,9 +22381,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21736,7 +22396,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21807,9 +22467,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21818,7 +22482,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21827,7 +22491,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21836,7 +22500,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21847,9 +22511,13 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
_masked = _mm_and_si128(_oligo, mask7);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21858,7 +22526,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -21875,7 +22543,11 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_6mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -21927,57 +22599,69 @@ count_6mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
#endif
@@ -22009,21 +22693,25 @@ count_6mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
@@ -22081,57 +22769,69 @@ count_6mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
#endif
@@ -22163,21 +22863,25 @@ count_6mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
@@ -22332,7 +23036,11 @@ static int
store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -22439,9 +23147,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22450,7 +23162,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22459,7 +23171,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22468,7 +23180,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22479,9 +23191,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22490,7 +23206,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22499,7 +23215,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22508,7 +23224,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22519,9 +23235,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22530,7 +23250,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22539,7 +23259,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22601,9 +23321,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22612,7 +23336,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22621,7 +23345,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22630,7 +23354,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22753,9 +23477,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22764,7 +23492,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22773,7 +23501,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22782,7 +23510,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22793,9 +23521,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22804,7 +23536,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22813,7 +23545,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22822,7 +23554,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22833,9 +23565,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22844,7 +23580,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22853,7 +23589,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22915,9 +23651,13 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
_masked = _mm_and_si128(_oligo, mask6);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22926,7 +23666,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22935,7 +23675,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22944,7 +23684,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -22971,7 +23711,11 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
static void
count_5mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -23027,61 +23771,73 @@ count_5mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("0 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("1 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("2 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("3 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("4 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("5 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("6 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("7 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("8 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("9 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("10 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("11 %04X => %d\n",masked,counts[masked]));
#endif
@@ -23109,21 +23865,25 @@ count_5mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("12 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("13 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("14 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("15 %04X => %d\n",masked,counts[masked]));
#endif
@@ -23180,61 +23940,73 @@ count_5mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("16 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("17 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("18 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("19 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("20 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("21 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("22 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("23 %04X => %d\n",masked,counts[masked]));
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("24 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("25 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("26 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("27 %04X => %d\n",masked,counts[masked]));
#endif
@@ -23262,21 +24034,25 @@ count_5mers_rev (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Gen
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
counts[masked] += 1;
debug(printf("28 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
counts[masked] += 1;
debug(printf("29 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
counts[masked] += 1;
debug(printf("30 %04X => %d\n",masked,counts[masked]));
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
counts[masked] += 1;
debug(printf("31 %04X => %d\n",masked,counts[masked]));
#endif
@@ -23426,7 +24202,11 @@ static int
store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Count_T *counts,
Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
Genomecomp_T masked, oligo;
-#ifndef INDIVIDUAL_SHIFTS
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+ UINT4 _masked[4] __attribute__ ((aligned (16)));
+ __m128i _oligo;
+#else
__m128i _oligo, _masked;
#endif
@@ -23542,9 +24322,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23553,7 +24337,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23562,7 +24346,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23571,7 +24355,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23582,9 +24366,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23593,7 +24381,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23602,7 +24390,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23611,7 +24399,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23622,9 +24410,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23633,7 +24425,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23642,7 +24434,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23651,7 +24443,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23704,9 +24496,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23715,7 +24511,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23724,7 +24520,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23733,7 +24529,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23855,9 +24651,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23866,7 +24666,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23875,7 +24675,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23884,7 +24684,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23895,9 +24695,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23906,7 +24710,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23915,7 +24719,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23924,7 +24728,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23935,9 +24739,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
_oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23946,7 +24754,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23955,7 +24763,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -23964,7 +24772,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -24017,9 +24825,13 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
#else
_oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+ _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
_masked = _mm_and_si128(_oligo, mask5);
+#endif
- masked = _mm_extract_epi32(_masked,0);
+ masked = EXTRACT(_masked,0);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -24028,7 +24840,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,1);
+ masked = EXTRACT(_masked,1);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -24037,7 +24849,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,2);
+ masked = EXTRACT(_masked,2);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -24046,7 +24858,7 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T **pointers, Chrpos_T **positions, Cou
}
}
- masked = _mm_extract_epi32(_masked,3);
+ masked = EXTRACT(_masked,3);
if (counts[masked]) {
if (pointers[masked] == positions[masked]) {
counts[masked] = 0;
@@ -24509,12 +25321,17 @@ count_positions_rev_simd (Count_T *counts, int indexsize,
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -24592,12 +25409,17 @@ count_positions_rev_simd (Count_T *counts, int indexsize,
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -24675,12 +25497,17 @@ count_positions_rev_simd (Count_T *counts, int indexsize,
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -24758,12 +25585,17 @@ count_positions_rev_simd (Count_T *counts, int indexsize,
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -24841,12 +25673,17 @@ count_positions_rev_simd (Count_T *counts, int indexsize,
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -25391,12 +26228,17 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -25470,12 +26312,17 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -25549,12 +26396,17 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -25628,12 +26480,17 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
@@ -25707,12 +26564,17 @@ store_positions_rev_simd (Chrpos_T **pointers, Chrpos_T **positions, Count_T *co
current = _mm_xor_si128(current,invert3);
nextlow_rc = ~nextlow;
#ifdef HAVE_SSE4_1
+ /* high0_rc = _mm_extract_epi32(current,2); */
+ /* low1_rc = _mm_extract_epi32(current,1); */
+ /* high1_rc = _mm_extract_epi32(current,0); */
+
temp = _mm_insert_epi32(current,nextlow_rc,0x03);
next = _mm_shuffle_epi32(temp,0x93);
#else
- high0_rc = _mm_extract_epi32(current,2);
- low1_rc = _mm_extract_epi32(current,1);
- high1_rc = _mm_extract_epi32(current,0);
+ high0_rc = ~high0;
+ low1_rc = ~low1;
+ high1_rc = ~high1;
+
next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
#endif
diff --git a/src/sarray-read.c b/src/sarray-read.c
index 5582b1a..46c0cdc 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 166828 2015-06-03 06:56:12Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 170516 2015-07-23 23:15:12Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8,6 +8,14 @@ static char rcsid[] = "$Id: sarray-read.c 166828 2015-06-03 06:56:12Z twu $";
#include "sarray-read.h"
+#ifdef WORDS_BIGENDIAN
+#define CONVERT(x) Bigendian_convert_uint(x)
+#include "bigendian.h"
+#else
+#define CONVERT(x) (x)
+#include "littleendian.h"
+#endif
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -38,20 +46,13 @@ static char rcsid[] = "$Id: sarray-read.c 166828 2015-06-03 06:56:12Z twu $";
#include "junction.h"
#include "stage3hr.h"
-#ifdef USE_CSA
-/* For FREAD_UINT */
-#ifdef WORDS_BIGENDIAN
-#include "bigendian.h"
-#else
-#include "littleendian.h"
-#endif
-#endif
-
-#ifdef HAVE_SSE2
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
#include <emmintrin.h>
#endif
-#ifdef HAVE_SSSE3
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSSE3)
+#else
#include <tmmintrin.h>
#endif
#ifdef HAVE_POPCNT
@@ -274,7 +275,7 @@ struct T {
Sarrayptr_T indexG;
Sarrayptr_T indexT;
Sarrayptr_T indexX;
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
__m128i indices0;
UINT4 index0[16];
#endif
@@ -360,11 +361,11 @@ static Chrpos_T *splicedists;
static int nsplicesites;
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
static __m128i epi32_convert; /* For converting unsigned ints to signed ints */
#endif
-#if defined(HAVE_SSE2) && defined(USE_SHUFFLE_MASK)
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN) && defined(USE_SHUFFLE_MASK)
static __m128i shuffle_mask16[16];
#endif
@@ -391,7 +392,11 @@ sarray_search_char (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char desired_ch
#else
mid = low + ((high - low) / 2);
#endif
+#ifdef WORDS_BIGENDIAN
+ pos = Bigendian_convert_uint(SA[mid]);
+#else
pos = SA[mid];
+#endif
c = Genome_get_char_lex(genome,pos,n,chartable);
if (desired_char > c) {
low = mid + 1;
@@ -415,7 +420,11 @@ sarray_search_char (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char desired_ch
/* This does not work for ceiling */
mid = low + ((high - low) / 2);
#endif
+#ifdef WORDS_BIGENDIAN
+ pos = Bigendian_convert_uint(SA[mid]);
+#else
pos = SA[mid];
+#endif
c = Genome_get_char_lex(genome,pos,n,chartable);
if (desired_char >= c) {
low = mid;
@@ -456,6 +465,9 @@ Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
} else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
conversion_fwd['A'] = 'G'; /* AG */
conversion_rev['T'] = 'C'; /* TC */
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ conversion_fwd['T'] = 'C'; /* TC */
+ conversion_rev['A'] = 'G'; /* AG */
}
chromosome_iit = chromosome_iit_in;
@@ -496,11 +508,11 @@ Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
printf("T => %u %u\n",sarray->initindexi[3],sarray->initindexj[3]);
#endif
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
epi32_convert = _mm_set1_epi32(2147483648); /* 2^31 */
#endif
-#if defined(HAVE_SSE2) && defined(USE_SHUFFLE_MASK)
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN) && defined(USE_SHUFFLE_MASK)
/* Used by fill_positions_filtered_first */
shuffle_mask16[0] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1);
shuffle_mask16[1] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 3, 2, 1, 0);
@@ -572,6 +584,12 @@ Sarray_shmem_remove (char *dir, char *fileroot, char *snps_root, Mode_T mode, bo
} else {
mode_prefix = ".a2itc.";
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (fwdp == true) {
+ mode_prefix = ".a2itc.";
+ } else {
+ mode_prefix = ".a2iag.";
+ }
}
sarrayfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sarray")+1,sizeof(char));
@@ -648,7 +666,7 @@ csa_lookup (T sarray, Sarrayptr_T i) {
expected_i = sarray->csa[i];
#endif
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
converted = _mm_sub_epi32(_mm_set1_epi32(i),epi32_convert);
cmp = _mm_cmpgt_epi32(converted,sarray->indices0); /* To use cmpgt, sarray->indices0 is shifted down by 1 */
matchbits = _mm_movemask_ps(_mm_castsi128_ps(cmp));
@@ -694,6 +712,10 @@ csa_lookup (T sarray, Sarrayptr_T i) {
}
}
+#elif defined(WORDS_BIGENDIAN)
+
+#define csa_lookup(sarray,i) Bigendian_convert_uint(sarray->array[i])
+
#else
#define csa_lookup(sarray,i) sarray->array[i]
@@ -757,6 +779,12 @@ Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_acc
} else {
mode_prefix = ".a2itc.";
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (fwdp == true) {
+ mode_prefix = ".a2itc.";
+ } else {
+ mode_prefix = ".a2iag.";
+ }
}
/* Old format */
@@ -894,7 +922,7 @@ Sarray_new (char *dir, char *fileroot, char *snps_root, Access_mode_T sarray_acc
fclose(fp);
FREE(filename);
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
new->indices0 = _mm_sub_epi32(_mm_set_epi32(new->indexX,new->indexT,new->indexG,new->indexC),
_mm_set1_epi32(2147483648) /* 2^31, same as epi_convert */);
/* because (a >= indices) is equivalent to (a > indices - 1) */
@@ -1406,17 +1434,29 @@ sarray_search_init (char *query, int querylength, int queryoffset, Compress_T qu
debug1(printf("low %u, high %u => mid %u\n",low,high,mid));
nmatches_mid = (nmatches_low < nmatches_high) ? nmatches_low : nmatches_high;
+#ifdef WORDS_BIGENDIAN
+ fasti = nmatches_mid +
+ (Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/Bigendian_convert_uint(sarray->array[mid])-queryoffset,
+ /*pos5*/queryoffset+nmatches_mid,
+ /*pos3*/queryoffset+querylength,plusp,genestrand,first_read_p);
+ pos = Bigendian_convert_uint(sarray->array[mid]) + fasti;
+#else
fasti = nmatches_mid +
(Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[mid]-queryoffset,
/*pos5*/queryoffset+nmatches_mid,
/*pos3*/queryoffset+querylength,plusp,genestrand,first_read_p);
pos = sarray->array[mid] + fasti;
+#endif
c = Genome_get_char_lex(genome,pos,sarray->n,chartable);
if (fasti == (Univcoord_T) querylength || c > query[fasti]) {
high = mid;
/* nmatches_high = (sarray->lcp[mid] < nmatches_mid) ? sarray->lcp[mid] : nmatches_mid; */
+#ifdef WORDS_BIGENDIAN
+ sa_mid = Bigendian_convert_uint(sarray->array[mid]);
+#else
sa_mid = sarray->array[mid];
+#endif
lcp_mid = Bitpack64_read_one(sa_mid,sarray->plcp_ptrs,sarray->plcp_comp) - sa_mid;
#ifdef USE_LCP
if (lcp_mid != sarray->lcp[mid]) {
@@ -1427,7 +1467,11 @@ sarray_search_init (char *query, int querylength, int queryoffset, Compress_T qu
} else {
low = mid;
/* nmatches_low = (sarray->lcp[low] < nmatches_mid) ? sarray->lcp[low] : nmatches_mid; */
+#ifdef WORDS_BIGENDIAN
+ sa_low = Bigendian_convert_uint(sarray->array[low]);
+#else
sa_low = sarray->array[low];
+#endif
lcp_low = Bitpack64_read_one(sa_low,sarray->plcp_ptrs,sarray->plcp_comp) - sa_low;
#ifdef USE_LCP
if (lcp_low != sarray->lcp[low]) {
@@ -1475,17 +1519,29 @@ sarray_search_final (char *query, int querylength, int queryoffset, Compress_T q
debug1(printf("low %u, high %u => mid %u\n",low,high,mid));
nmatches_mid = (nmatches_low < nmatches_high) ? nmatches_low : nmatches_high;
+#ifdef WORDS_BIGENDIAN
+ fasti = nmatches_mid +
+ (Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/Bigendian_convert_uint(sarray->array[mid])-queryoffset,
+ /*pos5*/queryoffset+nmatches_mid,
+ /*pos3*/queryoffset+querylength,plusp,genestrand,first_read_p);
+ pos = Bigendian_convert_uint(sarray->array[mid]) + fasti;
+#else
fasti = nmatches_mid +
(Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[mid]-queryoffset,
/*pos5*/queryoffset+nmatches_mid,
/*pos3*/queryoffset+querylength,plusp,genestrand,first_read_p);
pos = sarray->array[mid] + fasti;
+#endif
c = Genome_get_char_lex(genome,pos,sarray->n,chartable);
if (fasti == (Univcoord_T) querylength || c < query[fasti]) {
low = mid;
/* nmatches_low = (sarray->lcp[low] < nmatches_mid) ? sarray->lcp[low] : nmatches_mid; */
+#ifdef WORDS_BIGENDIAN
+ sa_low = Bigendian_convert_uint(sarray->array[low]);
+#else
sa_low = sarray->array[low];
+#endif
lcp_low = Bitpack64_read_one(sa_low,sarray->plcp_ptrs,sarray->plcp_comp) - sa_low;
#ifdef USE_LCP
if (lcp_low != sarray->lcp[low]) {
@@ -1496,7 +1552,11 @@ sarray_search_final (char *query, int querylength, int queryoffset, Compress_T q
} else {
high = mid;
/* nmatches_high = (sarray->lcp[mid] < nmatches_mid) ? sarray->lcp[mid] : nmatches_mid; */
+#ifdef WORDS_BIGENDIAN
+ sa_mid = Bigendian_convert_uint(sarray->array[mid]);
+#else
sa_mid = sarray->array[mid];
+#endif
lcp_mid = Bitpack64_read_one(sa_mid,sarray->plcp_ptrs,sarray->plcp_comp) - sa_mid;
#ifdef USE_LCP
if (lcp_mid != sarray->lcp[mid]) {
@@ -1593,6 +1653,7 @@ static bool
get_child_given_first (Sarrayptr_T *l, Sarrayptr_T *r, Sarrayptr_T i, Sarrayptr_T j, char desired_char,
T sarray, unsigned char *lcpchilddc, UINT4 lcp_whole, UINT4 nextl) {
char c1, c2;
+ UINT4 child_next;
debug2(printf("Getting children for l-interval from %u to %u, char %c\n",i,j,desired_char));
@@ -1632,20 +1693,28 @@ get_child_given_first (Sarrayptr_T *l, Sarrayptr_T *r, Sarrayptr_T i, Sarrayptr_
/* Test for child[i] being down: lcp[child[i]] > lcp[i] */
/* Test for child[i] being next_lindex: lcp[child[i]] == lcp[i] */
/* Test middle children */
- while (nextl < j && Bytecoding_lcpchilddc_lcp_next(nextl,/*bytes*/lcpchilddc,sarray->child_guide,sarray->child_exceptions,
+ while (nextl < j && Bytecoding_lcpchilddc_lcp_next(&child_next,nextl,/*bytes*/lcpchilddc,sarray->child_guide,sarray->child_exceptions,
sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions) == lcp_whole) {
/* Already tested for desired_char < c2 */
if (desired_char == c2) {
*l = nextl;
+#if 0
*r = Bytecoding_lcpchilddc_child_next(nextl,lcpchilddc,sarray->child_guide,sarray->child_exceptions,
sarray->child_guide_interval) - 1; /* child[nextl] - 1 */
+#else
+ *r = child_next - 1;
+#endif
debug2(printf("Child: %u to %u, c2 %c\n",nextl,*r,c2));
debug2(printf("Returning true\n\n"));
return true;
} else {
debug2(printf("Child: %u",nextl));
+#if 0
nextl = Bytecoding_lcpchilddc_child_next(nextl,lcpchilddc,sarray->child_guide,sarray->child_exceptions,
sarray->child_guide_interval); /* child[nextl] */
+#else
+ nextl = child_next;
+#endif
c2 = Bytecoding_lcpchilddc_dc(&c1,nextl,lcpchilddc);
debug2(printf(" to %u, discrim chars %c and %c\n",nextl-1,c1,c2));
@@ -1772,10 +1841,10 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
UINT4 l, r;
#ifdef DEBUG1
- Univcoord_T SA_i;
+ Univcoord_T SA_i, hit, child_next;
int k = 0;
- UINT4 recount;
- char Buffer[1000];
+ UINT4 recount, lcp_prev, lcp_next, lcp_i, max_lcp;
+ char Buffer[1000+1], c1, c2;
bool failp;
#endif
@@ -1866,6 +1935,13 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
/*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
plusp,genestrand,first_read_p);
printf("%d\t%u\t%u\t",recount,(*initptr)-1,SA_i/*+ 1U*/);
+ c2 = Bytecoding_lcpchilddc_dc(&c1,(*initptr)-1,sarray->lcpchilddc);
+ printf("%c%c\t",c1,c2);
+ lcp_i = Bytecoding_lcpchilddc_lcp((*initptr)-1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
+ printf("%u\t",lcp_i);
+ lcp_next = Bytecoding_lcpchilddc_lcp((*initptr),/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
+ printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*initptr)-1,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
+ sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
if (genestrand == +2) {
if (plusp) {
Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
@@ -1891,23 +1967,42 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
/* Hits */
+ lcp_prev = lcp_i;
for (k = 0; k < (int) (*finalptr - *initptr + 1) && k < MAX_DEBUG1_HITS; k++) {
SA_i = csa_lookup(sarray,(*initptr)+k);
recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
/*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
plusp,genestrand,first_read_p);
printf("%d\t%u\t%u\t",recount,(*initptr)+k,SA_i/*+ 1U*/);
+ c2 = Bytecoding_lcpchilddc_dc(&c1,(*initptr)+k,sarray->lcpchilddc);
+ printf("%c%c\t",c1,c2);
+ lcp_i = Bytecoding_lcpchilddc_lcp((*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
+ lcp_next = Bytecoding_lcpchilddc_lcp((*initptr)+k+1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
+ printf("%u\t",lcp_i);
+ printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
+ sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
+ max_lcp = lcp_i;
+ if (lcp_prev > max_lcp) {
+ max_lcp = lcp_prev;
+ }
+ if (lcp_next > max_lcp) {
+ max_lcp = lcp_next;
+ }
+ if (max_lcp > 1000) {
+ max_lcp = 1000;
+ }
+
if (genestrand == +2) {
if (plusp) {
- Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(SA_i,max_lcp+1,Buffer);
} else {
- Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(SA_i,max_lcp+1,Buffer);
}
} else {
if (plusp) {
- Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(SA_i,max_lcp+1,Buffer);
} else {
- Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(SA_i,max_lcp+1,Buffer);
}
}
printf("%s\n",Buffer);
@@ -1917,35 +2012,44 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
recount,csa_lookup(sarray,(*initptr)),*nmatches);
failp = true;
}
+
+ lcp_prev = lcp_i;
}
if (k < (int) (*finalptr - *initptr + 1)) {
/* Overflow */
printf("...\n");
k = (int) (*finalptr - *initptr);
- hit = sarray->array[(*initptr)+k];
+ hit = csa_lookup(sarray,(*initptr)+k);
recount = Genome_consecutive_matches_rightward(query_compress,/*left*/hit-queryoffset,
/*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
plusp,genestrand,first_read_p);
printf("%d\t%u\t%u\t",recount,(*initptr)+k,hit /*+ 1U*/);
+ c2 = Bytecoding_lcpchilddc_dc(&c1,(*initptr)+k,sarray->lcpchilddc);
+ printf("%c%c\t",c1,c2);
+ lcp_i = Bytecoding_lcpchilddc_lcp((*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
+ lcp_next = Bytecoding_lcpchilddc_lcp((*initptr)+k+1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
+ printf("%u\t",lcp_i);
+ printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
+ sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
if (genestrand == +2) {
if (plusp) {
- Genome_fill_buffer_convert_rev(sarray->array[(*initptr)+k],recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(hit,recount+1,Buffer);
} else {
- Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)+k],recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(hit,recount+1,Buffer);
}
} else {
if (plusp) {
- Genome_fill_buffer_convert_fwd(sarray->array[(*initptr)+k],recount+1,Buffer);
+ Genome_fill_buffer_convert_fwd(hit,recount+1,Buffer);
} else {
- Genome_fill_buffer_convert_rev(sarray->array[(*initptr)+k],recount+1,Buffer);
+ Genome_fill_buffer_convert_rev(hit,recount+1,Buffer);
}
}
printf("%s\n",Buffer);
if (recount != *nmatches) {
printf("querylength is %d\n",querylength);
printf("false positive: recount %d at %u does not equal expected nmatches %d\n",
- recount,sarray->array[(*initptr)],*nmatches);
+ recount,csa_lookup(sarray,*initptr),*nmatches);
failp = true;
}
/* hits[k] = sarray->array[(*initptr)++]; */
@@ -1959,6 +2063,11 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
/*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
plusp,genestrand,first_read_p);
printf("%d\t%u\t%u\t",recount,(*finalptr)+1,SA_i/*+ 1U*/);
+ c2 = Bytecoding_lcpchilddc_dc(&c1,(*finalptr)+1,sarray->lcpchilddc);
+ printf("%c%c\t",c1,c2);
+ printf("%u\t",Bytecoding_lcpchilddc_lcp((*finalptr)+1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
+ printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*finalptr)+1,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
+ sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
if (genestrand == +2) {
if (plusp) {
Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
@@ -2324,7 +2433,7 @@ fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
while (ptr <= finalptr) {
debug7a(printf("Std: Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value = array[ptr++]) < low_adj) {
+ if ((value = CONVERT(array[ptr++])) < low_adj) {
/* Skip */
} else if (value > high_adj) {
/* Skip */
@@ -2350,7 +2459,7 @@ fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
ptr = lastptr; /* One past the last ptr with a result */
while (i < *npositions) {
- if ((value = array[--ptr]) < low_adj) {
+ if ((value = CONVERT(array[--ptr])) < low_adj) {
/* Skip */
} else if (value > high_adj) {
/* Skip */
@@ -2372,7 +2481,7 @@ fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
#ifdef HAVE_ALLOCA
-#if defined(HAVE_SSSE3) && defined(HAVE_SSE2)
+#if defined(HAVE_SSSE3) && defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
/* SSSE3 needed for _mm_shuffle_epi8 */
static void
fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
@@ -2461,7 +2570,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
n_prealign = this->finalptr - this->initptr + 1;
}
for (k = 0; k < n_prealign; k++) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
if ((value = *array_ptr++) >= low_adj && value <= high_adj) {
*out++ = value - this->querystart;
}
@@ -2609,7 +2718,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
#else
-/* Missing SSSE3 or SSE2 */
+/* Bigendian or missing SSSE3 or SSE2 */
static void
fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
@@ -2623,7 +2732,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
Univcoord_T *array = sarray->array;
#endif
Univcoord_T *positions_temp;
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
#ifdef HAVE_64_BIT
UINT8 pointer;
#else
@@ -2660,7 +2769,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
this->npositions_allocated = this->npositions = 0;
ptr = this->initptr;
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
if (ptr + 3 > this->finalptr) { /* ptr + 4 > (this->finalptr + 1) */
/* Handle in normal manner */
debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
@@ -2694,8 +2803,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
/* Initial part */
debug7(printf("Initial part:\n"));
for (k = 0; k < n_prealign; k++) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value0 = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
+ if ((value0 = CONVERT(array[ptr++])) < low_adj) {
/* Skip */
} else if (value0 > high_adj) {
/* Skip */
@@ -2730,7 +2839,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
ptr += 4;
} else {
#ifndef USE_CSA
- value3 = array[ptr++];
+ value3 = CONVERT(array[ptr++]);
#endif
if (value3 < low_adj) {
/* Skip */
@@ -2744,7 +2853,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value2 = array[ptr++];
+ value2 = CONVERT(array[ptr++]);
#endif
if (value2 < low_adj) {
/* Skip */
@@ -2758,7 +2867,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value1 = array[ptr++];
+ value1 = CONVERT(array[ptr++]);
#endif
if (value1 < low_adj) {
/* Skip */
@@ -2772,7 +2881,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value0 = array[ptr++];
+ value0 = CONVERT(array[ptr++]);
#endif
if (value0 < low_adj) {
/* Skip */
@@ -2882,7 +2991,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
Univcoord_T *array = sarray->array;
#endif
Univcoord_T *more_positions;
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
#ifdef HAVE_64_BIT
UINT8 pointer;
#else
@@ -2920,7 +3029,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
this->npositions_allocated = this->npositions = 0;
ptr = this->initptr;
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
if (ptr + 3 > this->finalptr) { /* ptr + 4 > (this->finalptr + 1) */
/* Handle in normal manner */
debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
@@ -2958,8 +3067,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
/* Initial part */
debug7(printf("Initial part:\n"));
for (k = 0; k < n_prealign; k++) {
- debug7a(printf("Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
- if ((value0 = array[ptr++]) < low_adj) {
+ debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
+ if ((value0 = CONVERT(array[ptr++])) < low_adj) {
/* Skip */
} else if (value0 > high_adj) {
/* Skip */
@@ -2998,7 +3107,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
ptr += 4;
} else {
#ifndef USE_CSA
- value3 = array[ptr++];
+ value3 = CONVERT(array[ptr++]);
#endif
if (value3 < low_adj) {
/* Skip */
@@ -3016,7 +3125,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value2 = array[ptr++];
+ value2 = CONVERT(array[ptr++]);
#endif
if (value2 < low_adj) {
/* Skip */
@@ -3034,7 +3143,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value1 = array[ptr++];
+ value1 = CONVERT(array[ptr++]);
#endif
if (value1 < low_adj) {
/* Skip */
@@ -3052,7 +3161,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value0 = array[ptr++];
+ value0 = CONVERT(array[ptr++]);
#endif
if (value0 < low_adj) {
/* Skip */
@@ -3132,7 +3241,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
i = GUESS_ALLOCATION; /* Start count with the number stored */
ptr = lastptr; /* One past the last ptr with a result */
-#ifdef HAVE_SSE2
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
if (this->initptr + 4 < ptr) {
while (i < this->npositions) {
if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
@@ -3157,7 +3266,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
/* Initial part */
while (i < this->npositions) {
- if ((value0 = array[--ptr]) < low_adj) {
+ if ((value0 = CONVERT(array[--ptr])) < low_adj) {
/* Skip */
} else if (value0 > high_adj) {
/* Skip */
@@ -3185,7 +3294,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
ptr -= 4;
} else {
#ifndef USE_CSA
- value0 = array[--ptr];
+ value0 = CONVERT(array[--ptr]);
#endif
if (value0 < low_adj) {
/* Skip */
@@ -3196,7 +3305,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value1 = array[--ptr];
+ value1 = CONVERT(array[--ptr]);
#endif
if (value1 < low_adj) {
/* Skip */
@@ -3207,7 +3316,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value2 = array[--ptr];
+ value2 = CONVERT(array[--ptr]);
#endif
if (value2 < low_adj) {
/* Skip */
@@ -3218,7 +3327,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T goal, Univcoord
}
#ifndef USE_CSA
- value3 = array[--ptr];
+ value3 = CONVERT(array[--ptr]);
#endif
if (value3 < low_adj) {
/* Skip */
@@ -3421,7 +3530,13 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
this->n_all_positions = 0;
} else {
this->all_positions = (Univcoord_T *) MALLOC(this->n_all_positions*sizeof(Univcoord_T));
+#ifdef WORDS_BIGENDIAN
+ for (i = 0; i < this->n_all_positions; i++) {
+ this->all_positions[i] = Bigendian_convert_uint(sarray->array[this->initptr+i]);
+ }
+#else
memcpy(this->all_positions,&(sarray->array[this->initptr]),this->n_all_positions*sizeof(Univcoord_T));
+#endif
qsort(this->all_positions,this->n_all_positions,sizeof(Univcoord_T),Univcoord_compare);
}
#endif
@@ -6078,6 +6193,8 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
*fillin_diagonals = (List_T) NULL;
middle_path = (List_T) NULL;
+#ifdef SUBDIVIDE_ENDS
+ /* Without SUBDIVIDE_ENDS, sub_diagonals is guaranteed to be NULL */
/* A4. Process oligoindex diagonals from right */
if (List_length(sub_diagonals) == 0) {
/* Skip */
@@ -6096,6 +6213,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
}
#endif
}
+#endif
if (right_indel_diagonal != NULL) {
debug13(printf("Pushing right indel diagonal onto middle: query %d..%d, diagonal %u\n",
@@ -6302,7 +6420,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
}
- sub_diagonals = (List_T) NULL;
+ sub_diagonals = (List_T) NULL;
#ifdef SUBDIVIDE_ENDS
/* Run oligoindex here to left of common_diagonal */
@@ -6358,6 +6476,8 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
}
+#ifdef SUBDIVIDE_ENDS
+ /* Without SUBDIVIDE_ENDS, sub_diagonals is guaranteed to be NULL */
/* C4. Process oligoindex diagonals from left */
if (List_length(sub_diagonals) == 0) {
/* Skip */
@@ -6376,6 +6496,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
}
#endif
}
+#endif
debug13(printf("***Exiting find_best_path\n"));
@@ -7985,7 +8106,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
all_left_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
fillin_diagonals_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
- chrhigh = 0;
+ chrnum = 1;
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
for (i = 0; i < nseeds_plus; i++) {
left = best_plus_elt->positions[i];
if (left > chrhigh) {
@@ -8054,7 +8176,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
all_left_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
fillin_diagonals_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
- chrhigh = 0;
+ chrnum = 1;
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
for (i = 0; i < nseeds_minus; i++) {
left = best_minus_elt->positions[i];
if (left > chrhigh) {
@@ -8102,7 +8225,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
/* *sarray_gmap = (List_T) NULL; */
- chrhigh = 0;
+ chrnum = 1;
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
for (i = 0; i < nseeds_plus; i++) {
if (1 /*|| scores_plus[i] > best_score - 20*/) {
diagonal = middle_diagonals_plus[i];
@@ -8183,7 +8307,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
}
}
- chrhigh = 0;
+ chrnum = 1;
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
for (i = 0; i < nseeds_minus; i++) {
if (1 /*|| scores_minus[i] > best_score - 20*/) {
diagonal = middle_diagonals_minus[i];
@@ -8268,7 +8393,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
#if 0
/* Salvage using gmap */
- chrhigh = 0;
+ chrnum = 1;
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
for (i = 0; i < nseeds_plus; i++) {
if (incomplete_result_p(middle_path_plus[i],querylength) == true) {
left = best_plus_elt->positions[i];
@@ -8284,7 +8410,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
}
}
- chrhigh = 0;
+ chrnum = 1;
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
for (i = 0; i < nseeds_minus; i++) {
if (incomplete_result_p(middle_path_minus[i],querylength) == true) {
left = best_minus_elt->positions[i];
diff --git a/src/sarray-write.c b/src/sarray-write.c
index f9e75dc..30418b9 100644
--- a/src/sarray-write.c
+++ b/src/sarray-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-write.c 167266 2015-06-11 00:07:57Z twu $";
+static char rcsid[] = "$Id: sarray-write.c 170326 2015-07-22 17:49:55Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -188,7 +188,11 @@ sarray_search_char (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char desired_ch
if (low % 2 == 1 && high % 2 == 1) {
mid += 1;
}
+#ifdef WORDS_BIGENDIAN
+ pos = Bigendian_convert_uint(SA[mid]);
+#else
pos = SA[mid];
+#endif
c = Genome_get_char_lex(genomecomp,pos,n,chartable);
if (desired_char > c) {
low = mid + 1;
@@ -207,7 +211,11 @@ sarray_search_char (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char desired_ch
if (low % 2 == 1 || high % 2 == 1) {
mid += 1;
}
+#ifdef WORDS_BIGENDIAN
+ pos = Bigendian_convert_uint(SA[mid]);
+#else
pos = SA[mid];
+#endif
c = Genome_get_char_lex(genomecomp,pos,n,chartable);
if (desired_char >= c) {
low = mid;
@@ -274,7 +282,11 @@ sarray_search_simple (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char *query,
}
nmatches = 0;
+#ifdef WORDS_BIGENDIAN
+ pos = Bigendian_convert_uint(SA[mid]);
+#else
pos = SA[mid];
+#endif
while (nmatches < querylength && (c = Genome_get_char_lex(genomecomp,pos,n,chartable)) == query[nmatches]) {
nmatches++;
@@ -300,7 +312,11 @@ sarray_search_simple (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char *query,
}
nmatches = 0;
+#ifdef WORDS_BIGENDIAN
+ pos = Bigendian_convert_uint(SA[mid]);
+#else
pos = SA[mid];
+#endif
while (nmatches < querylength && (c = Genome_get_char_lex(genomecomp,pos,n,chartable)) == query[nmatches]) {
nmatches++;
@@ -1032,7 +1048,11 @@ Sarray_compute_lcp (char *rankfile, char *permuted_sarray_file, char *sarrayfile
for (b = 0, i = ii; b < RW_BATCH; b++, i++) {
rank_i = read_buffer_1[b];
if (rank_i > 0) {
+#ifdef WORDS_BIGENDIAN
+ write_buffer[b] = Bigendian_convert_uint(SA[rank_i - 1]);
+#else
write_buffer[b] = SA[rank_i - 1];
+#endif
} else {
write_buffer[b] = 0; /* Will be ignored */
}
@@ -1042,7 +1062,11 @@ Sarray_compute_lcp (char *rankfile, char *permuted_sarray_file, char *sarrayfile
for (i = ii; i <= n; i++) { /* final partial batch */
FREAD_UINT(&rank_i,fp);
if (rank_i > 0) {
+#ifdef WORDS_BIGENDIAN
+ FWRITE_UINT(Bigendian_convert_uint(SA[rank_i - 1]),permsa_fp);
+#else
FWRITE_UINT(SA[rank_i - 1],permsa_fp);
+#endif
} else {
FWRITE_UINT(zero,permsa_fp); /* Will be ignored */
}
@@ -1288,8 +1312,13 @@ compute_plcp (UINT4 *plcp, UINT4 *SA, UINT4 n) {
}
}
+#if 0
/* This makes lcp[0] = -1, because lcp[0] = plcp[SA[0]] = plcp[n] = -1 */
plcp[n] = -1;
+#else
+ /* This makes lcp[0] = 0, because lcp[0] = plcp[SA[0]] = plcp[n] = 0 */
+ plcp[n] = 0;
+#endif
return;
}
@@ -1375,9 +1404,11 @@ get_all_children (bool *filledp, Sarrayptr_T *l, Sarrayptr_T *r, Sarrayptr_T i,
void
Sarray_write_plcp (char *plcpptrsfile, char *plcpcompfile, UINT4 *SA, UINT4 genomelength) {
UINT4 *plcp;
- UINT4 *ramp;
+ UINT4 *ramp, *p;
UINT4 n = genomelength, i;
+ UINT4 ii;
+ FILE *fp;
plcp = (UINT4 *) MALLOC((n+1)*sizeof(UINT4));
ramp = plcp;
@@ -1398,7 +1429,23 @@ Sarray_write_plcp (char *plcpptrsfile, char *plcpcompfile, UINT4 *SA, UINT4 geno
fprintf(stderr,"Writing permuted lcp file...");
/* Provide n to write values [0..n] */
+
+#if 0
+ /* Print plcp as an array */
+ fp = fopen("plcp","wb");
+ for (ii = 0; ii + RW_BATCH <= n; ii += RW_BATCH) {
+ p = (void *) &(ramp[ii]);
+ FWRITE_UINTS(p,RW_BATCH,fp);
+ }
+ if (ii <= n) {
+ p = (void *) &(ramp[ii]);
+ FWRITE_UINTS(p,n - ii + 1,fp);
+ }
+ fclose(fp);
+#else
Bitpack64_write_differential(plcpptrsfile,plcpcompfile,ramp,n);
+#endif
+
fprintf(stderr,"done\n");
FREE(plcp);
@@ -2244,10 +2291,18 @@ Sarray_array_uncompress (Genome_T genomecomp, char *sarrayfile, char *plcpptrsfi
printf("i\tSA\tLCP\n");
pos = start;
+#ifdef WORDS_BIGENDIAN
+ sa_i = Bigendian_convert_uint(SA[pos]);
+#else
sa_i = SA[pos];
+#endif
lcp_i = Bitpack64_read_one(sa_i,plcpptrs,plcpcomp) - sa_i;
+#ifdef WORDS_BIGENDIAN
+ sa_nexti = Bigendian_convert_uint(SA[pos+1]);
+#else
sa_nexti = SA[pos+1];
+#endif
lcp_nexti = Bitpack64_read_one(sa_nexti,plcpptrs,plcpcomp) - sa_nexti;
if (pos == 0) {
@@ -2261,7 +2316,11 @@ Sarray_array_uncompress (Genome_T genomecomp, char *sarrayfile, char *plcpptrsfi
sa_i = sa_nexti;
lcp_i = lcp_nexti;
+#ifdef WORDS_BIGENDIAN
+ sa_nexti = Bigendian_convert_uint(SA[pos+1]);
+#else
sa_nexti = SA[pos+1];
+#endif
lcp_nexti = Bitpack64_read_one(sa_nexti,plcpptrs,plcpcomp) - sa_nexti;
printf("%u\t%u\t%u\t",pos,sa_i,lcp_i);
@@ -2342,7 +2401,11 @@ Sarray_child_uncompress (Genome_T genomecomp, unsigned char *lcpchilddc, UINT4 *
pos = start;
for (pos = start; pos <= end; pos++) {
+#ifdef WORDS_BIGENDIAN
+ sa_i = Bigendian_convert_uint(SA[pos]);
+#else
sa_i = SA[pos];
+#endif
lcp_i = Bytecoding_lcpchilddc_lcp(pos,lcpchilddc,lcp_exceptions,n_lcp_exceptions); /* lcp(i,j) */
c2 = Bytecoding_lcpchilddc_dc(&c1,pos,lcpchilddc);
diff --git a/src/sequence.c b/src/sequence.c
index f049b51..beb2d8f 100644
--- a/src/sequence.c
+++ b/src/sequence.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sequence.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: sequence.c 170023 2015-07-17 16:47:21Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -15,7 +15,7 @@ static char rcsid[] = "$Id: sequence.c 166641 2015-05-29 21:13:04Z twu $";
#include <stdlib.h>
#include <string.h>
#include <strings.h> /* For rindex */
-#include <ctype.h> /* For iscntrl and isspace */
+#include <ctype.h> /* For iscntrl, isspace, and toupper */
#ifdef HAVE_ZLIB
#include <zlib.h>
@@ -1741,6 +1741,44 @@ Sequence_stdout_raw (T this) {
return;
}
+void
+Sequence_stdout_stream_chars (T this) {
+ int i = 0, pos, start, end;
+
+ start = 0;
+ end = this->fulllength;
+
+ for (pos = start; pos < end; pos++, i++) {
+ switch (toupper(this->contents[i])) {
+ case 'A': putchar('A'); break;
+ case 'C': putchar('C'); break;
+ case 'G': putchar('G'); break;
+ case 'T': putchar('T'); break;
+ default: putchar('X');
+ }
+ }
+ return;
+}
+
+void
+Sequence_stdout_stream_ints (T this) {
+ int i = 0, pos, start, end;
+
+ start = 0;
+ end = this->fulllength;
+
+ for (pos = start; pos < end; pos++, i++) {
+ switch (toupper(this->contents[i])) {
+ case 'A': putchar(0); break;
+ case 'C': putchar(1); break;
+ case 'G': putchar(2); break;
+ case 'T': putchar(3); break;
+ default: putchar(4);
+ }
+ }
+ return;
+}
+
T
Sequence_substring (T usersegment, unsigned int left, unsigned int length,
diff --git a/src/sequence.h b/src/sequence.h
index 9ba3fbc..bc9ec26 100644
--- a/src/sequence.h
+++ b/src/sequence.h
@@ -1,4 +1,4 @@
-/* $Id: sequence.h 157225 2015-01-22 18:47:23Z twu $ */
+/* $Id: sequence.h 170023 2015-07-17 16:47:21Z twu $ */
#ifndef SEQUENCE_INCLUDED
#define SEQUENCE_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -125,6 +125,10 @@ Sequence_stdout_two (T ref, T alt, bool uppercasep, int wraplength);
extern void
Sequence_stdout_raw (T this);
+extern void
+Sequence_stdout_stream_chars (T this);
+extern void
+Sequence_stdout_stream_ints (T this);
extern T
Sequence_substring (T usersegment, unsigned int left, unsigned int length,
diff --git a/src/snpindex.c b/src/snpindex.c
index bf07662..a54704b 100644
--- a/src/snpindex.c
+++ b/src/snpindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: snpindex.c 161940 2015-03-25 20:36:59Z twu $";
+static char rcsid[] = "$Id: snpindex.c 168395 2015-06-26 17:13:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -855,22 +855,28 @@ merge_positions8 (FILE *positions_high_fp, FILE *positions_low_fp,
UINT8 *ptr1 = start1, *ptr2 = start2;
char *nt;
#ifdef WORDS_BIGENDIAN
- UINT8 position2;
+ UINT8 position1, position2;
#endif
while (ptr1 < end1 && ptr2 < end2) {
#ifdef WORDS_BIGENDIAN
- abort();
+ position1 = Bigendian_convert_uint8(*ptr1);
position2 = Bigendian_convert_uint8(*ptr2);
- if (*ptr1 < position2) {
- FWRITE_UINT8(*ptr1,positions_fp);
+ if (position1 < position2) {
+ position8_high = position1 >> POSITIONS8_HIGH_SHIFT;
+ position8_low = position1 & POSITIONS8_LOW_MASK;
+ FWRITE_CHAR(position8_high,positions_high_fp);
+ FWRITE_UINT(position8_low,positions_low_fp);
ptr1++;
- } else if (position2 < *ptr1) {
- FWRITE_UINT8(position2,positions_fp);
+ } else if (position2 < position1) {
+ position8_high = position2 >> POSITIONS8_HIGH_SHIFT;
+ position8_low = position2 & POSITIONS8_LOW_MASK;
+ FWRITE_CHAR(position8_high,positions_high_fp);
+ FWRITE_UINT(position8_low,positions_low_fp);
ptr2++;
} else {
nt = shortoligo_nt(oligo,index1part);
- fprintf(stderr,"Problem: saw duplicate positions %u in oligo %s\n",*ptr1,nt);
+ fprintf(stderr,"Problem: saw duplicate positions %u in oligo %s\n",position1,nt);
FREE(nt);
abort();
/*
@@ -881,7 +887,6 @@ merge_positions8 (FILE *positions_high_fp, FILE *positions_low_fp,
}
#else
-
if (*ptr1 < *ptr2) {
position8_high = *ptr1 >> POSITIONS8_HIGH_SHIFT;
position8_low = *ptr1 & POSITIONS8_LOW_MASK;
@@ -909,27 +914,32 @@ merge_positions8 (FILE *positions_high_fp, FILE *positions_low_fp,
}
while (ptr1 < end1) {
+#ifdef WORDS_BIGENDIAN
+ position1 = Bigendian_convert_uint8(*ptr1);
+ position8_high = position1 >> POSITIONS8_HIGH_SHIFT;
+ position8_low = position1 & POSITIONS8_LOW_MASK;
+#else
position8_high = *ptr1 >> POSITIONS8_HIGH_SHIFT;
position8_low = *ptr1 & POSITIONS8_LOW_MASK;
+#endif
FWRITE_CHAR(position8_high,positions_high_fp);
FWRITE_UINT(position8_low,positions_low_fp);
ptr1++;
}
-#ifdef WORDS_BIGENDIAN
while (ptr2 < end2) {
- FWRITE_UINT8(Bigendian_convert_uint8(*ptr2),positions_fp);
- ptr2++;
- }
+#ifdef WORDS_BIGENDIAN
+ position2 = Bigendian_convert_uint8(*ptr2);
+ position8_high = position2 >> POSITIONS8_HIGH_SHIFT;
+ position8_low = position2 & POSITIONS8_LOW_MASK;
#else
- while (ptr2 < end2) {
position8_high = *ptr2 >> POSITIONS8_HIGH_SHIFT;
position8_low = *ptr2 & POSITIONS8_LOW_MASK;
+#endif
FWRITE_CHAR(position8_high,positions_high_fp);
FWRITE_UINT(position8_low,positions_low_fp);
ptr2++;
}
-#endif
return;
}
@@ -941,16 +951,17 @@ merge_positions4 (FILE *positions_fp, UINT4 *start1, UINT4 *end1,
UINT4 *ptr1 = start1, *ptr2 = start2;
char *nt;
#ifdef WORDS_BIGENDIAN
- UINT4 position2;
+ UINT4 position1, position2;
#endif
while (ptr1 < end1 && ptr2 < end2) {
#ifdef WORDS_BIGENDIAN
+ position1 = Bigendian_convert_uint(*ptr1);
position2 = Bigendian_convert_uint(*ptr2);
- if (*ptr1 < position2) {
- FWRITE_UINT(*ptr1,positions_fp);
+ if (position1 < position2) {
+ FWRITE_UINT(position1,positions_fp);
ptr1++;
- } else if (position2 < *ptr1) {
+ } else if (position2 < position1) {
FWRITE_UINT(position2,positions_fp);
ptr2++;
} else {
@@ -988,21 +999,24 @@ merge_positions4 (FILE *positions_fp, UINT4 *start1, UINT4 *end1,
}
while (ptr1 < end1) {
+#ifdef WORDS_BIGENDIAN
+ position1 = Bigendian_convert_uint(*ptr1);
+ FWRITE_UINT(position1,positions_fp);
+#else
FWRITE_UINT(*ptr1,positions_fp);
+#endif
ptr1++;
}
-#ifdef WORDS_BIGENDIAN
while (ptr2 < end2) {
- FWRITE_UINT(Bigendian_convert_uint(*ptr2),positions_fp);
- ptr2++;
- }
+#ifdef WORDS_BIGENDIAN
+ position2 = Bigendian_convert_uint(*ptr2);
+ FWRITE_UINT(position2,positions_fp);
#else
- while (ptr2 < end2) {
FWRITE_UINT(*ptr2,positions_fp);
+#endif
ptr2++;
}
-#endif
return;
}
diff --git a/src/splice.c b/src/splice.c
index 3e68052..5362f29 100644
--- a/src/splice.c
+++ b/src/splice.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: splice.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: splice.c 167583 2015-06-15 18:12:14Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2347,11 +2347,11 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
} else {
/* Multiple hits */
donor_hits = acceptor_hits = (List_T) NULL;
- if (plusp == true) {
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
+ for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Stage3end_plusp(hit) == true) {
if (Substring_genomicstart(donor) == segmenti_left) {
donor_hits = List_push(donor_hits,(void *) hit);
} else if (Substring_genomicstart(acceptor) == segmenti_left) {
@@ -2359,12 +2359,7 @@ group_by_segmenti_aux (int *found_score, List_T winners, List_T *ambiguous,
} else {
Stage3end_free(&hit);
}
- }
- } else {
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
+ } else {
if (Substring_genomicend(donor) == segmenti_left) {
donor_hits = List_push(donor_hits,(void *) hit);
} else if (Substring_genomicend(acceptor) == segmenti_left) {
@@ -2685,11 +2680,11 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
} else {
/* Multiple hits */
donor_hits = acceptor_hits = (List_T) NULL;
- if (plusp == true) {
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
+ for (p = accepted_hits; p != NULL; p = List_next(p)) {
+ hit = (Stage3end_T) List_head(p);
+ donor = Stage3end_substring_donor(hit);
+ acceptor = Stage3end_substring_acceptor(hit);
+ if (Stage3end_plusp(hit) == true) {
if (Substring_genomicstart(donor) == segmentj_left) {
donor_hits = List_push(donor_hits,(void *) hit);
} else if (Substring_genomicstart(acceptor) == segmentj_left) {
@@ -2698,12 +2693,7 @@ group_by_segmentj_aux (int *found_score, List_T winners, List_T *ambiguous,
abort();
Stage3end_free(&hit);
}
- }
- } else {
- for (p = accepted_hits; p != NULL; p = List_next(p)) {
- hit = (Stage3end_T) List_head(p);
- donor = Stage3end_substring_donor(hit);
- acceptor = Stage3end_substring_acceptor(hit);
+ } else {
if (Substring_genomicend(donor) == segmentj_left) {
donor_hits = List_push(donor_hits,(void *) hit);
} else if (Substring_genomicend(acceptor) == segmentj_left) {
diff --git a/src/stage1hr.c b/src/stage1hr.c
index a6f5fa4..67fb4db 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 167163 2015-06-09 20:54:02Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 170517 2015-07-23 23:15:28Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -189,7 +189,7 @@ static int index1part;
static int index1interval;
static int spansize;
static int two_index1intervals;
-static int min_readlength;
+static int min_kmer_readlength;
static Univ_IIT_T chromosome_iit;
static int circular_typeint;
@@ -1068,6 +1068,10 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
/* this->maxfloor = 1 + querylength/oligobase * 2; */
if (use_only_sarray_p == true) {
+ *allvalidp = false;
+ return 1;
+ } else if (use_sarray_p == true && querylength < min_kmer_readlength) {
+ *allvalidp = false;
return 1;
} else {
reader = Reader_new(queryuc_ptr,/*querystart*/0,/*queryend*/querylength);
@@ -1252,6 +1256,67 @@ read_oligos (bool *allvalidp, T this, char *queryuc_ptr, int querylength,
}
}
}
+
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (genestrand == +2) {
+ while ((last_state = Oligo_next(last_state,&querypos,&forward,&revcomp,
+ reader,/*cdnaend*/FIVE)) != DONE) {
+#ifdef LARGE_GENOMES
+ this->plus_positions_high[querypos] = (unsigned char *) NULL;
+ this->plus_positions_low[querypos] = (UINT4 *) NULL;
+ this->minus_positions_high[querypos] = (unsigned char *) NULL;
+ this->minus_positions_low[querypos] = (UINT4 *) NULL;
+#else
+ this->plus_positions[querypos] = (Univcoord_T *) NULL;
+ this->minus_positions[querypos] = (Univcoord_T *) NULL;
+#endif
+ this->plus_npositions[querypos] = 0;
+ this->minus_npositions[querypos] = 0;
+
+ if (last_state == VALID) {
+#ifdef USE_VALIDP
+ this->validp[querypos] = true;
+#endif
+ this->plus_retrievedp[querypos] = false;
+ this->minus_retrievedp[querypos] = false;
+
+ this->forward_oligos[querypos] = Atoi_reduce_ag(forward) & oligobase_mask;
+ this->revcomp_oligos[querypos] = Atoi_reduce_tc(revcomp >> leftreadshift) & oligobase_mask;
+
+ debug(printf("At querypos %d, read oligo = %06X\n",querypos,this->forward_oligos[querypos]));
+ noligos++;
+ }
+ }
+ } else {
+ while ((last_state = Oligo_next(last_state,&querypos,&forward,&revcomp,
+ reader,/*cdnaend*/FIVE)) != DONE) {
+#ifdef LARGE_GENOMES
+ this->plus_positions_high[querypos] = (unsigned char *) NULL;
+ this->plus_positions_low[querypos] = (UINT4 *) NULL;
+ this->minus_positions_high[querypos] = (unsigned char *) NULL;
+ this->minus_positions_low[querypos] = (UINT4 *) NULL;
+#else
+ this->plus_positions[querypos] = (Univcoord_T *) NULL;
+ this->minus_positions[querypos] = (Univcoord_T *) NULL;
+#endif
+ this->plus_npositions[querypos] = 0;
+ this->minus_npositions[querypos] = 0;
+
+ if (last_state == VALID) {
+#ifdef USE_VALIDP
+ this->validp[querypos] = true;
+#endif
+ this->plus_retrievedp[querypos] = false;
+ this->minus_retrievedp[querypos] = false;
+
+ this->forward_oligos[querypos] = Atoi_reduce_tc(forward) & oligobase_mask;
+ this->revcomp_oligos[querypos] = Atoi_reduce_ag(revcomp >> leftreadshift) & oligobase_mask;
+
+ debug(printf("At querypos %d, read oligo = %06X\n",querypos,this->forward_oligos[querypos]));
+ noligos++;
+ }
+ }
+ }
}
if (noligos < query_lastpos + 1) {
@@ -6493,7 +6558,6 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
double best_prob, prob, donor_prob, acceptor_prob;
Substring_T donor, acceptor;
- int sensedir;
#ifdef LARGE_GENOMES
Uint8list_T ambcoords;
#else
@@ -7220,7 +7284,6 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
double best_prob, prob, donor_prob, acceptor_prob;
Substring_T donor, acceptor;
- int sensedir;
#ifdef LARGE_GENOMES
Uint8list_T ambcoords;
#else
@@ -9395,7 +9458,6 @@ find_spliceends_distant_dna_plus (List_T **distant_startfrags, List_T **distant_
int *floors_from_neg3, *floors_to_pos3;
int splice_pos_start, splice_pos_end;
- int i;
#ifdef HAVE_ALLOCA
int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
@@ -9569,7 +9631,6 @@ find_spliceends_distant_dna_minus (List_T **distant_startfrags, List_T **distant
int *floors_from_neg3, *floors_to_pos3;
int splice_pos_start, splice_pos_end;
- int i;
#ifdef HAVE_ALLOCA
int *mismatch_positions = (int *) ALLOCA((querylength+1)*sizeof(int));
@@ -14076,6 +14137,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
#endif
+/* Segment chaining */
static List_T
convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
char *accession, char *queryuc_ptr, int querylength, int query_lastpos,
@@ -14704,6 +14766,7 @@ convert_plus_segments_to_gmap (History_T gmap_history, List_T hits,
}
+/* Segment chaining */
static List_T
convert_minus_segments_to_gmap (History_T gmap_history, List_T hits,
char *accession, char *queryuc_ptr, int querylength, int query_lastpos,
@@ -15336,11 +15399,10 @@ align_singleend_with_gmap (History_T gmap_history, List_T result, T this,
Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
int user_maxlevel, int cutoff_level, bool first_read_p) {
- List_T new_result = NULL, gmap_hits = NULL;
+ List_T new_result = NULL;
Stage3end_T hit, gmap;
- List_T p, a;
+ List_T p;
int genestrand;
- int missing_hit, missing_gmap;
int i;
@@ -15454,17 +15516,16 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
int user_maxlevel, int indel_penalty_middle, int indel_penalty_end,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- bool allvalidp, bool keep_floors_p, int genestrand, bool first_read_p) {
- List_T hits, greedy = NULL, subs = NULL, terminals = NULL, indels = NULL, new_indels,
- ambiguous = NULL, singlesplicing = NULL, doublesplicing = NULL, shortendsplicing = NULL,
+ bool keep_floors_p, int genestrand, bool first_read_p) {
+ List_T hits, greedy = NULL, subs = NULL, terminals = NULL, indels = NULL,
+ singlesplicing = NULL, doublesplicing = NULL, shortendsplicing = NULL,
longsinglesplicing = NULL, distantsplicing = NULL, gmap_hits = NULL;
List_T plus_anchor_segments = NULL, minus_anchor_segments = NULL;
- List_T p, a;
+ List_T p;
Stage3end_T hit, gmap;
int nmisses_allowed_sarray;
- int found_score, done_level, opt_level, fast_level, mismatch_level, nmismatches, max_mismatches_allowed;
+ int found_score, done_level, opt_level, fast_level, mismatch_level, nmismatches;
int max_splice_mismatches, i;
- int missing_hit, missing_gmap;
int nhits = 0, nsplicepairs = 0;
List_T *startfrags_plus, *endfrags_plus, *startfrags_minus, *endfrags_minus;
List_T *donors_plus, *antidonors_plus, *acceptors_plus, *antiacceptors_plus,
@@ -15472,8 +15533,9 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
bool any_omitted_p, ambiguousp, alloc_floors_p = false, floors_computed_p = false;
Floors_T floors;
bool spanningsetp, completesetp, gmapp;
- bool segments_computed_p = false, gmap_better_p, extend_left_p, extend_right_p;
+ bool segments_computed_p = false;
Indexdb_T plus_indexdb, minus_indexdb;
+ bool allvalidp;
if (genestrand == +2) {
plus_indexdb = indexdb_rev;
@@ -15484,9 +15546,15 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
}
found_score = querylength;
- fast_level = (querylength + index1interval - 1)/spansize - NREQUIRED_FAST;
- debug(printf("fast_level %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
- fast_level,querylength,index1interval,spansize,NREQUIRED_FAST));
+ if (querylength < min_kmer_readlength) {
+ fast_level = querylength - 1 - NREQUIRED_FAST;
+ debug(printf("fast_level %d = querylength %d - 1 - nrequired_fast %d\n",
+ fast_level,querylength,NREQUIRED_FAST));
+ } else {
+ fast_level = (querylength + index1interval - 1)/spansize - NREQUIRED_FAST;
+ debug(printf("fast_level %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
+ fast_level,querylength,index1interval,spansize,NREQUIRED_FAST));
+ }
#if 0
/* This prevents complete_mm procedure, needed for short reads */
@@ -15528,7 +15596,7 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
nmisses_allowed_sarray = *cutoff_level;
#ifndef LARGE_GENOMES
- if (use_only_sarray_p == true) {
+ if (use_only_sarray_p == true || (use_sarray_p == true && querylength < min_kmer_readlength)) {
hits = Sarray_search_greedy(&(*cutoff_level),
queryuc_ptr,queryrc,querylength,query_compress_fwd,query_compress_rev,maxpeelback,pairpool,
dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
@@ -15572,6 +15640,9 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
}
#endif
+ if (querylength < min_kmer_readlength) {
+ spanningsetp = false;
+ }
/* Search 2: Exact/subs via spanning set */
@@ -15640,6 +15711,10 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
completesetp = false;
}
+ if (querylength < min_kmer_readlength) {
+ completesetp = false;
+ }
+
if (completesetp == true) {
if (this->read_oligos_p == false) {
read_oligos(&allvalidp,this,queryuc_ptr,querylength,query_lastpos,/*genestrand*/0,
@@ -16217,1158 +16292,1143 @@ align_end (int *cutoff_level, History_T gmap_history, T this,
gmap,Stage3end_nmatches_posttrim(gmap),missing_gmap,Stage3end_nmatches_posttrim(hit),missing_hit));
gmap_hits = List_push(gmap_hits,(void *) gmap);
Stage3end_set_improved_by_gmap(hit);
- }
- }
- }
- debug13(printf("Have %d GMAP hits\n",List_length(gmap_hits)));
-
- if (alloc_floors_p == true) {
- Floors_free(&floors);
- }
-
- /* Keep gmap_hits found in search 9 and 10 */
- if (gmap_hits != NULL) {
- hits = List_append(hits,gmap_hits);
- }
-
- if (gmap_improvement_p == false) {
- debug(printf("No GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
- hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/true);
- hits = Stage3end_resolve_multimapping(hits);
- debug(printf("After remove_overlaps: %d\n",List_length(hits)));
-
- } else {
- debug(printf("GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/false);
- /* Don't reject based on trimlength until after GMAP improvements */
- hits = Stage3end_remove_overlaps(hits,/*finalp*/false);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/false);
- hits = Stage3end_resolve_multimapping(hits);
- debug(printf("After remove_overlaps: %d\n",List_length(hits)));
-
- hits = align_singleend_with_gmap(gmap_history,hits,this,query_compress_fwd,query_compress_rev,
- accession,queryuc_ptr,querylength,query_lastpos,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel,*cutoff_level,
- first_read_p);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
- hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
- hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/true);
- hits = Stage3end_resolve_multimapping(hits);
- }
-
- hits = Stage3end_remove_circular_alias(hits);
- hits = Stage3end_remove_duplicates(hits); /* Aliases can cause duplicates */
-
- List_free(&plus_anchor_segments);
- List_free(&minus_anchor_segments);
-
- return hits;
- }
-
-
- static Stage3end_T *
- single_read (int *npaths, int *first_absmq, int *second_absmq,
- Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
- int indexdb_size_threshold, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
- bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool keep_floors_p) {
- Stage3end_T *stage3array;
- History_T gmap_history;
- List_T hits = NULL;
- T this = NULL;
- int user_maxlevel;
- int querylength, query_lastpos, cutoff_level;
- char *queryuc_ptr, *quality_string;
- Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
- bool allvalidp;
-
- #ifdef HAVE_ALLOCA
- char *queryrc;
- #else
- char queryrc[MAX_READLENGTH+1];
- #endif
-
- if ((querylength = Shortread_fulllength(queryseq)) < min_readlength) {
- fprintf(stderr,"Read %s has length %d < min_readlength %d. Skipping.\n",
- Shortread_accession(queryseq),querylength,min_readlength);
- /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
- *npaths = 0;
- return (Stage3end_T *) NULL;
-
- #ifndef HAVE_ALLOCA
- } else if (querylength > MAX_READLENGTH) {
- fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq),querylength,MAX_READLENGTH);
- *npaths = 0;
- return (Stage3end_T *) NULL;
- #endif
-
- } else {
- if (user_maxlevel_float < 0.0) {
- user_maxlevel = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
- } else {
- user_maxlevel = (int) user_maxlevel_float;
- }
-
- /* Limit search on repetitive sequences */
- queryuc_ptr = Shortread_fullpointer_uc(queryseq);
- quality_string = Shortread_quality_string(queryseq);
- if (check_dinucleotides(queryuc_ptr,querylength) == false) {
- user_maxlevel = 0;
- }
-
- query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
- query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
- #ifdef HAVE_ALLOCA
- queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
- #endif
- make_complement_buffered(queryrc,queryuc_ptr,querylength);
-
- this = Stage1_new(querylength);
- query_lastpos = querylength - index1part;
-
- gmap_history = History_new();
- hits = align_end(&cutoff_level,gmap_history,this,
- query_compress_fwd,query_compress_rev,
- Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp,keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
-
- if ((*npaths = List_length(hits)) == 0) {
- stage3array = (Stage3end_T *) NULL;
- } else {
- stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
- stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
- stage3array,maxpaths_search,queryseq,queryuc_ptr,queryrc,
- query_compress_fwd,query_compress_rev,
- quality_string,/*displayp*/true);
- }
-
- History_free(&gmap_history);
- Compress_free(&query_compress_fwd);
- Compress_free(&query_compress_rev);
- Stage1_free(&this,querylength);
- return stage3array;
- }
- }
-
-
- static Stage3end_T *
- single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_absmq,
- Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
- int indexdb_size_threshold, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
- bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool keep_floors_p) {
- Stage3end_T *stage3array;
- History_T gmap_history;
- List_T hits, hits_geneplus = NULL, hits_geneminus = NULL;
- T this_geneplus = NULL, this_geneminus = NULL;
- int user_maxlevel;
- int querylength, query_lastpos, cutoff_level;
- char *queryuc_ptr, *quality_string;
- Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
- bool allvalidp;
-
- #ifdef HAVE_ALLOCA
- char *queryrc;
- #else
- char queryrc[MAX_READLENGTH+1];
- #endif
-
-
- if ((querylength = Shortread_fulllength(queryseq)) < min_readlength) {
- fprintf(stderr,"Read %s has length %d < min_readlength %d. Skipping\n",
- Shortread_accession(queryseq),querylength,min_readlength);
- /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
- *npaths = 0;
- return (Stage3end_T *) NULL;
-
- #ifndef HAVE_ALLOCA
- } else if (querylength > MAX_READLENGTH) {
- fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
- Shortread_accession(queryseq),querylength,MAX_READLENGTH);
- *npaths = 0;
- return (Stage3end_T *) NULL;
- #endif
-
- } else {
- if (user_maxlevel_float < 0.0) {
- user_maxlevel = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
- } else {
- user_maxlevel = (int) user_maxlevel_float;
- }
-
- this_geneplus = Stage1_new(querylength);
- this_geneminus = Stage1_new(querylength);
-
- queryuc_ptr = Shortread_fullpointer_uc(queryseq);
- quality_string = Shortread_quality_string(queryseq);
- query_lastpos = querylength - index1part;
-
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr,querylength) == false) {
- user_maxlevel = 0;
- }
-
- query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
- query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
- gmap_history = History_new();
- #ifdef HAVE_ALLOCA
- queryrc = (char *) ALLOCA((querylength+1)*sizeof(char));
- #endif
- make_complement_buffered(queryrc,queryuc_ptr,querylength);
-
- if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1,
- /*first_read_p*/true) > 0) {
- hits_geneplus = align_end(&cutoff_level,gmap_history,this_geneplus,
- query_compress_fwd,query_compress_rev,
- Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp,keep_floors_p,/*genestrand*/+1,/*first_read_p*/true);
- }
-
- if (read_oligos(&allvalidp,this_geneminus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+2,
- /*first_read_p*/true) > 0) {
- hits_geneminus = align_end(&cutoff_level,gmap_history,this_geneminus,
- query_compress_fwd,query_compress_rev,
- Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp,keep_floors_p,/*genestrand*/+2,/*first_read_p*/true);
- }
-
- hits = List_append(hits_geneplus,hits_geneminus);
- hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/true,/*finalp*/true);
- hits = Stage3end_reject_trimlengths(hits);
- hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
- hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
- querylength,/*keep_gmap_p*/false,/*finalp*/true);
- hits = Stage3end_resolve_multimapping(hits);
-
- if ((*npaths = List_length(hits)) == 0) {
- stage3array = (Stage3end_T *) NULL;
- } else {
- stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
- stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
- stage3array,maxpaths_search,queryseq,queryuc_ptr,queryrc,
- query_compress_fwd,query_compress_rev,
- quality_string,/*displayp*/true);
- }
-
- History_free(&gmap_history);
- Compress_free(&query_compress_fwd);
- Compress_free(&query_compress_rev);
- Stage1_free(&this_geneminus,querylength);
- Stage1_free(&this_geneplus,querylength);
- return stage3array;
- }
- }
-
-
- Stage3end_T *
- Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
- Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
- int indexdb_size_threshold, Floors_T *floors_array,
- double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
- bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- bool keep_floors_p) {
-
- if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED) {
- return single_read(&(*npaths),&(*first_absmq),&(*second_absmq),
- queryseq,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
- indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
- } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED) {
- return single_read_tolerant_nonstranded(&(*npaths),&(*first_absmq),&(*second_absmq),queryseq,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,user_maxlevel_float,
- indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
- } else {
- fprintf(stderr,"Do not recognize mode %d\n",mode);
- abort();
- }
- }
-
-
-
- /* #define HITARRAY_SHORTENDSPLICING 4 */
- /* #define HITARRAY_DISTANTSPLICING 4 */
-
-
- static List_T
- align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3,
- Shortread_T queryseq5, Shortread_T queryseq3,
- char *queryuc_ptr, int querylength, int query_lastpos,
- #ifdef END_KNOWNSPLICING_SHORTCUT
- char *queryrc, bool invertedp,
- #endif
- Compress_T query_compress_fwd, Compress_T query_compress_rev,
- struct Segment_T *plus_segments, int plus_nsegments,
- struct Segment_T *minus_segments, int minus_nsegments,
- Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
- Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
- Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
- Chrpos_T pairmax, Chrpos_T shortsplicedist, int user_maxlevel,
- int genestrand, bool first_read_p) {
- List_T hits = NULL;
- int sensedir, sense_try;
- int overlap;
-
- int zero_offset = 0;
- Univcoord_T segmentstart, segmentend;
- Univcoord_T genomicbound, genomicbound2, mappingstart, mappingend,
- chroffset, chrhigh, mappingpos;
- #ifdef USE_GREEDY
- Univcoord_T close_mappingstart_greedy, close_mappingend_greedy,
- middle_mappingstart_greedy, middle_mappingend_greedy;
- #endif
- Univcoord_T close_mappingstart_last, close_mappingend_last,
- middle_mappingstart_last, middle_mappingend_last;
- Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
- Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
- Chrpos_T chrlength;
- Chrnum_T chrnum;
- bool close_mappingstart_p = false, close_mappingend_p = false;
- bool middle_mappingstart_p = false, middle_mappingend_p = false;
- bool fallback_mappingstart_p, fallback_mappingend_p;
- bool good_start_p, good_end_p, watsonp, favor_right_p;
-
- int starti, endi, i;
-
- if (hit3 == NULL) {
- /* Both events are tested by Stage3end_anomalous_splice_p */
- if ((chrnum = Stage3end_chrnum(hit5)) == 0) {
- /* Translocation */
- return (List_T) NULL;
-
- } else if (Stage3end_hittype(hit5) == SAMECHR_SPLICE) {
- /* A genomic event that doesn't get reflected in chrnum */
- return (List_T) NULL;
-
- } else if ((watsonp = Stage3end_plusp(hit5)) == true) {
- chroffset = Stage3end_chroffset(hit5);
- chrhigh = Stage3end_chrhigh(hit5);
- chrlength = Stage3end_chrlength(hit5);
-
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicstart */
- debug13(printf("Found primers\n"));
- genomicbound = Stage3end_genomicstart(hit5);
-
- } else if (Stage3end_anomalous_splice_p(hit5) == true) {
- /* Go from genomicstart */
- debug13(printf("Anomalous splice\n"));
- genomicbound = Stage3end_genomicstart(hit5);
-
- } else {
- genomicbound = Stage3end_genomicend(hit5);
-
- #if 0
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 < genomicbound) {
- zero_offset = genomicbound - genomicbound2;
- genomicbound = genomicbound2;
- }
- }
- #endif
- }
-
- debug13(printf("Case 1: hit5 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit5),
- Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
- Stage3end_sensedir(hit5),genomicbound - chroffset));
-
- knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
- knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
- segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax,chrhigh);
- #ifdef LONG_ENDSPLICES
- mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
- #else
- mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chrhigh);
- #endif
- debug13(printf("Original bounds E: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingend %u\n",
- knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingend - chroffset));
-
- close_mappingend_last = middle_mappingend_last = Stage3end_genomicend(hit5);
- #ifdef USE_GREEDY
- close_mappingend_greedy = middle_mappingend_greedy = segmentend;
- #endif
-
- if (plus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentstart);
- while (i < plus_nsegments - 1 && plus_segments[i].diagonal == (Univcoord_T) -1) {
- i++;
- }
- starti = i;
- while (plus_segments[i].diagonal < segmentend) {
- endi = i;
- i++;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti <= endi);
- for (i = starti; i <= endi; i++) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
- plus_segments[i].querypos5,plus_segments[i].querypos3));
- if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 1. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
- mappingpos > genomicbound) {
- middle_mappingend_greedy = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
- }
- #endif
-
- #ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
- #else
- if ((mappingpos = plus_segments[i].diagonal) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
- }
- #endif
-
- } else {
- debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
- query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = plus_segments[i].diagonal) < close_mappingend_greedy &&
- mappingpos > genomicbound) {
- close_mappingend_greedy = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
- }
- #endif
- if ((mappingpos = plus_segments[i].diagonal) > close_mappingend_last) {
- /* Use > for NOT_GREEDY */
- close_mappingend_last = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
- }
- }
- }
+ }
+ }
+ }
+ debug13(printf("Have %d GMAP hits\n",List_length(gmap_hits)));
- #ifdef USE_GREEDY
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- debug13(printf("Using middle mappingend\n"));
- close_knownsplice_limit_high = middle_mappingend_greedy;
- close_mappingend_greedy = middle_mappingend_greedy;
- close_mappingend_p = true;
- }
- #else
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- debug13(printf("Using middle mappingend\n"));
- close_knownsplice_limit_high = middle_mappingend_last;
- close_mappingend_last = middle_mappingend_last;
- close_mappingend_p = true;
- }
- #endif
- #ifdef USE_GREEDY
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- mappingend = close_mappingend_last;
- }
- #else
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- }
- #endif
-
- if (close_mappingend_p == false) {
- fallback_mappingend_p = false;
- #ifdef USE_GREEDY
- } else if (mappingend <= close_mappingend_greedy) {
- fallback_mappingend_p = false;
- #endif
- } else {
- debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
- fallback_mappingend_p = true;
- }
- }
- }
-
- favor_right_p = false;
-
- } else {
- chroffset = Stage3end_chroffset(hit5);
- chrhigh = Stage3end_chrhigh(hit5);
- chrlength = Stage3end_chrlength(hit5);
-
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicstart */
- debug13(printf("Found primers\n"));
- genomicbound = Stage3end_genomicstart(hit5);
-
- } else if (Stage3end_anomalous_splice_p(hit5) == true) {
- /* Go from genomicstart */
- debug13(printf("Anomalous splice\n"));
- genomicbound = Stage3end_genomicstart(hit5);
-
- } else {
- genomicbound = Stage3end_genomicend(hit5);
-
- #if 0
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 > genomicbound) {
- zero_offset = genomicbound2 - genomicbound;
- genomicbound = genomicbound2;
- }
- }
- #endif
- }
-
- debug13(printf("Case 2: hit5 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit5),
- Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
- Stage3end_sensedir(hit5),genomicbound - chroffset));
-
- knownsplice_limit_high = mappingend = segmentend = genomicbound;
- knownsplice_limit_low = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
- segmentstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax,chroffset);
- #ifdef LONG_ENDSPLICES
- mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
- #else
- mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chroffset);
- #endif
- debug13(printf("Original bounds F: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingstart %u\n",
- knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingstart - chroffset));
-
- close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicend(hit5);
- #ifdef USE_GREEDY
- close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
- #endif
-
- if (minus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentend);
- while (i >= 0 && minus_segments[i].diagonal >= segmentend) {
- i--;
- }
- starti = i;
- while (i >= 0 && minus_segments[i].diagonal > segmentstart) {
- if (minus_segments[i].diagonal < (Univcoord_T) -1) {
- endi = i;
- }
- i--;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
- minus_segments[i].querypos5,minus_segments[i].querypos3));
- if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 2. Missing end of query, so there could be a middle splice */
- debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
- mappingpos < genomicbound) {
- middle_mappingstart_greedy = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
- }
- #endif
- #ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
- #else
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
- #endif
-
- } else {
- debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
- query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
- mappingpos < genomicbound) {
- close_mappingstart_greedy = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
- }
- #endif
- if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) < close_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- close_mappingstart_last = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
- }
- }
- }
+ if (alloc_floors_p == true) {
+ Floors_free(&floors);
+ }
- #ifdef USE_GREEDY
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_greedy;
- close_mappingstart_greedy = middle_mappingstart_greedy;
- close_mappingstart_p = true;
- }
- #else
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_last;
- close_mappingstart_last = middle_mappingstart_last;
- close_mappingstart_p = true;
- }
- #endif
- #ifdef USE_GREEDY
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- mappingstart = close_mappingstart_last;
- }
- #else
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- }
- #endif
- if (close_mappingstart_p == false) {
- fallback_mappingstart_p = false;
- #ifdef USE_GREEDY
- } else if (mappingstart >= close_mappingstart_greedy) {
- fallback_mappingstart_p = false;
- #endif
- } else {
- debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
- fallback_mappingstart_p = true;
- }
- }
- }
-
- favor_right_p = false;
- }
-
- if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) {
- sense_try = +1;
- } else if (sensedir == SENSE_ANTI) {
- sense_try = -1;
- } else {
- sense_try = 0;
- }
-
- } else if (hit5 == NULL) {
- /* Both events are tested by Stage3end_anomalous_splice_p */
- if ((chrnum = Stage3end_chrnum(hit3)) == 0) {
- /* Translocation */
- return (List_T) NULL;
-
- } else if (Stage3end_hittype(hit3) == SAMECHR_SPLICE) {
- /* A genomic event that doesn't get reflected in chrnum */
- return (List_T) NULL;
-
- } else if ((watsonp = Stage3end_plusp(hit3)) == true) {
- chroffset = Stage3end_chroffset(hit3);
- chrhigh = Stage3end_chrhigh(hit3);
- chrlength = Stage3end_chrlength(hit3);
-
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicend */
- debug13(printf("Found primers\n"));
- genomicbound = Stage3end_genomicend(hit3);
-
- } else if (Stage3end_anomalous_splice_p(hit3) == true) {
- /* Go from genomicend */
- debug13(printf("Anomalous splice\n"));
- genomicbound = Stage3end_genomicend(hit3);
-
- } else {
- genomicbound = Stage3end_genomicstart(hit3);
-
- #if 0
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 > genomicbound) {
- zero_offset = genomicbound2 - genomicbound;
- genomicbound = genomicbound2;
- }
- }
- #endif
- }
-
- debug13(printf("Case 3: hit3 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit3),
- Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
- Stage3end_sensedir(hit3),genomicbound - chroffset));
-
- knownsplice_limit_high = mappingend = segmentend = genomicbound;
- knownsplice_limit_low = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
- segmentstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax,chroffset);
- #ifdef LONG_ENDSPLICES
- mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
- #else
- mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chroffset);
- #endif
-
- close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicstart(hit3);
- #ifdef USE_GREEDY
- close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
- #endif
-
- if (plus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentend);
- while (i >= 0 && plus_segments[i].diagonal >= segmentend) {
- i--;
- }
- starti = i;
- while (i >= 0 && plus_segments[i].diagonal > segmentstart) {
- if (plus_segments[i].diagonal < (Univcoord_T) -1) {
- endi = i;
- }
- i--;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti >= endi);
- for (i = starti; i >= endi; i--) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
- plus_segments[i].querypos5,plus_segments[i].querypos3));
- if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 3. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
- mappingpos < genomicbound) {
- middle_mappingstart_greedy = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
- }
- #endif
- #ifdef LONG_ENDSPLICES
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
- #else
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) < middle_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- middle_mappingstart_last = mappingpos;
- middle_mappingstart_p = true;
- debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
- }
- #endif
-
- } else {
- debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
- plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
- mappingpos < genomicbound) {
- close_mappingstart_greedy = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
- }
- #endif
- if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) < close_mappingstart_last) {
- /* Use < for NOT_GREEDY */
- close_mappingstart_last = mappingpos;
- close_mappingstart_p = true;
- debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
- }
- }
- }
+ /* Keep gmap_hits found in search 9 and 10 */
+ if (gmap_hits != NULL) {
+ hits = List_append(hits,gmap_hits);
+ }
- #ifdef USE_GREEDY
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_greedy;
- close_mappingstart_greedy = middle_mappingstart_greedy;
- close_mappingstart_p = true;
- }
- #else
- if (close_mappingstart_p == true) {
- close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- } else if (middle_mappingstart_p == true) {
- debug13(printf("Using middle mappingstart\n"));
- close_knownsplice_limit_low = middle_mappingstart_last;
- close_mappingstart_last = middle_mappingstart_last;
- close_mappingstart_p = true;
- }
- #endif
- #ifdef USE_GREEDY
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
- knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
- mappingstart = close_mappingstart_last;
- }
- #else
- if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
- knownsplice_limit_low = middle_mappingstart_last;
- mappingstart = middle_mappingstart_last;
- }
- #endif
- if (close_mappingstart_p == false) {
- fallback_mappingstart_p = false;
- #ifdef USE_GREEDY
- } else if (mappingstart >= close_mappingstart_greedy) {
- fallback_mappingstart_p = false;
- #endif
- } else {
- debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
- fallback_mappingstart_p = true;
- }
- }
- }
-
- favor_right_p = true;
-
- } else {
- chroffset = Stage3end_chroffset(hit3);
- chrhigh = Stage3end_chrhigh(hit3);
- chrlength = Stage3end_chrlength(hit3);
-
- if (Shortread_find_primers(queryseq5,queryseq3) == true) {
- /* Go from genomicend */
- debug13(printf("Found primers\n"));
- genomicbound = Stage3end_genomicend(hit3);
-
- } else if (Stage3end_anomalous_splice_p(hit3) == true) {
- /* Go from genomicend */
- debug13(printf("Anomalous splice\n"));
- genomicbound = Stage3end_genomicend(hit3);
-
- } else {
- genomicbound = Stage3end_genomicstart(hit3);
-
- #if 0
- /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
- if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
- Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
- debug13(printf("Found overlap of %d\n",overlap));
- if (genomicbound2 < genomicbound) {
- zero_offset = genomicbound - genomicbound2;
- genomicbound = genomicbound2;
- }
- }
- #endif
- }
-
- debug13(printf("Case 4: hit3 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
- Stage3end_hittype_string(hit3),
- Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
- Stage3end_sensedir(hit3),genomicbound - chroffset));
-
- knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
- knownsplice_limit_high = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
- segmentend = add_bounded(Stage3end_genomicstart(hit3),pairmax,chrhigh);
- #ifdef LONG_ENDSPLICES
- mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
- #else
- mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chrhigh);
- #endif
-
- close_mappingend_last = middle_mappingend_last = Stage3end_genomicstart(hit3);
- #ifdef USE_GREEDY
- close_mappingend_greedy = middle_mappingend_greedy = segmentend;
- #endif
-
- if (minus_nsegments > 0) {
- /* Use segments to bound */
- debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
- segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
- starti = endi = -1;
- i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentstart);
- while (i < minus_nsegments - 1 && minus_segments[i].diagonal == (Univcoord_T) -1) {
- i++;
- }
- starti = i;
- while (minus_segments[i].diagonal < segmentend) {
- endi = i;
- i++;
- }
- if (starti >= 0 && endi >= 0) {
- debug13(printf("starti = %d, endi = %d\n",starti,endi));
- assert(starti <= endi);
- for (i = starti; i <= endi; i++) {
- debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
- (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
- minus_segments[i].querypos5,minus_segments[i].querypos3));
- if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
- /* Case 4. Missing start of query, so there could be a middle splice */
- debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
- minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
- mappingpos > genomicbound) {
- middle_mappingend_greedy = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
- }
- #endif
- #ifdef LONG_ENDSPLICES
- if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
- }
- #else
- if ((mappingpos = minus_segments[i].diagonal) > middle_mappingend_last) {
- /* Use > for NOT_GREEDY */
- middle_mappingend_last = mappingpos;
- middle_mappingend_p = true;
- debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
- }
- #endif
-
- } else {
- debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
- minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
- #ifdef USE_GREEDY
- if ((mappingpos = minus_segments[i].diagonal) < close_mappingend_greedy &&
- mappingpos > genomicbound) {
- close_mappingend_greedy = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
- }
- #endif
- if ((mappingpos = minus_segments[i].diagonal) > close_mappingend_last) {
- /* Use > for NOT_GREEDY */
- close_mappingend_last = mappingpos;
- close_mappingend_p = true;
- debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
- }
- }
- }
+ if (gmap_improvement_p == false) {
+ debug(printf("No GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/true);
+ hits = Stage3end_reject_trimlengths(hits);
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/true);
+ hits = Stage3end_resolve_multimapping(hits);
+ debug(printf("After remove_overlaps: %d\n",List_length(hits)));
- #ifdef USE_GREEDY
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- debug13(printf("Using middle mappingend\n"));
- close_knownsplice_limit_high = middle_mappingend_greedy;
- close_mappingend_greedy = middle_mappingend_greedy;
- close_mappingend_p = true;
- }
- #else
- if (close_mappingend_p == true) {
- close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- } else if (middle_mappingend_p == true) {
- debug13(printf("Using middle mappingend\n"));
- close_knownsplice_limit_high = middle_mappingend_last;
- close_mappingend_last = middle_mappingend_last;
- close_mappingend_p = true;
- }
- #endif
- #ifdef USE_GREEDY
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
- knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
- mappingend = close_mappingend_last;
- }
- #else
- if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
- knownsplice_limit_high = middle_mappingend_last;
- mappingend = middle_mappingend_last;
- }
- #endif
- if (close_mappingend_p == false) {
- fallback_mappingend_p = false;
- #ifdef USE_GREEDY
- } else if (mappingend <= close_mappingend_greedy) {
- fallback_mappingend_p = false;
- #endif
- } else {
- debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
- fallback_mappingend_p = true;
- }
- }
- }
+ } else {
+ debug(printf("GMAP improvement: Before remove_overlaps at cutoff level %d: %d\n",*cutoff_level,List_length(hits)));
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/false);
+ /* Don't reject based on trimlength until after GMAP improvements */
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/false);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/false);
+ hits = Stage3end_resolve_multimapping(hits);
+ debug(printf("After remove_overlaps: %d\n",List_length(hits)));
- favor_right_p = true;
- }
+ hits = align_singleend_with_gmap(gmap_history,hits,this,query_compress_fwd,query_compress_rev,
+ accession,queryuc_ptr,querylength,query_lastpos,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel,*cutoff_level,
+ first_read_p);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/true);
+ hits = Stage3end_reject_trimlengths(hits);
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
+ hits = Stage3end_optimal_score(hits,*cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/true);
+ hits = Stage3end_resolve_multimapping(hits);
+ }
- if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) {
- sense_try = +1;
- } else if (sensedir == SENSE_ANTI) {
- sense_try = -1;
- } else {
- sense_try = 0;
- }
+ hits = Stage3end_remove_circular_alias(hits);
+ hits = Stage3end_remove_duplicates(hits); /* Aliases can cause duplicates */
+
+ List_free(&plus_anchor_segments);
+ List_free(&minus_anchor_segments);
+
+ return hits;
+}
+
+
+static Stage3end_T *
+single_read (int *npaths, int *first_absmq, int *second_absmq,
+ Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
+ int indexdb_size_threshold, Floors_T *floors_array,
+ double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
+ int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ bool keep_floors_p) {
+ Stage3end_T *stage3array;
+ History_T gmap_history;
+ List_T hits = NULL;
+ T this = NULL;
+ int user_maxlevel;
+ int querylength, query_lastpos, cutoff_level;
+ char *queryuc_ptr, *quality_string;
+ Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
+
+#ifdef HAVE_ALLOCA
+ char *queryrc;
+#else
+ char queryrc[MAX_READLENGTH+1];
+#endif
+ querylength = Shortread_fulllength(queryseq);
+
+#ifndef HAVE_ALLOCA
+ if (querylength > MAX_READLENGTH) {
+ fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
+ Shortread_accession(queryseq),querylength,MAX_READLENGTH);
+ *npaths = 0;
+ return (Stage3end_T *) NULL;
+ }
+#endif
+
+ if (user_maxlevel_float < 0.0) {
+ user_maxlevel = -1;
+ } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
+ user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
} else {
- abort();
+ user_maxlevel = (int) user_maxlevel_float;
}
-#ifdef OLD_GENOMICBOUND
- knownsplice_limit_low = genomicstart + querylength;
- knownsplice_limit_high = genomicend - querylength;
+ /* Limit search on repetitive sequences */
+ queryuc_ptr = Shortread_fullpointer_uc(queryseq);
+ quality_string = Shortread_quality_string(queryseq);
+ if (check_dinucleotides(queryuc_ptr,querylength) == false) {
+ user_maxlevel = 0;
+ }
+
+ query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
+ query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
+#ifdef HAVE_ALLOCA
+ queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
#endif
+ make_complement_buffered(queryrc,queryuc_ptr,querylength);
- if (close_mappingstart_p == true && close_mappingend_p == true) {
- debug13(printf("Halfmapping: Running gmap with close mappingstart and close mappingend\n"));
- hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
- hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,close_mappingstart_last,close_mappingend_last,
- close_knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ this = Stage1_new(querylength);
+ query_lastpos = querylength - index1part;
- if (good_start_p == true && good_end_p == true) {
- /* Success */
- } else if (gmap_rerun_p == false) {
- debug13(printf("Skipping re-run of gmap\n"));
- } else if (/* require both ends to be good */ 0 && good_start_p == true) {
- if (fallback_mappingend_p == true) {
- debug13(printf("Halfmapping: Re-running gmap with close mappingstart only\n"));
- hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
- hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
- close_knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
- }
+ gmap_history = History_new();
+ hits = align_end(&cutoff_level,gmap_history,this,
+ query_compress_fwd,query_compress_rev,
+ Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
+
+ if ((*npaths = List_length(hits)) == 0) {
+ stage3array = (Stage3end_T *) NULL;
+ } else {
+ stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
+ stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
+ stage3array,maxpaths_search,queryseq,queryuc_ptr,queryrc,
+ query_compress_fwd,query_compress_rev,
+ quality_string,/*displayp*/true);
+ }
+
+ History_free(&gmap_history);
+ Compress_free(&query_compress_fwd);
+ Compress_free(&query_compress_rev);
+ Stage1_free(&this,querylength);
+ return stage3array;
+}
- } else if (/* require both ends to be good */ 0 && good_end_p == true) {
- if (fallback_mappingstart_p == true) {
- debug13(printf("Halfmapping: Re-running gmap with close mappingend only\n"));
- hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
- hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
- knownsplice_limit_low,close_knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
- }
- } else {
- if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
- debug13(printf("Halfmapping: Re-running gmap with far mappingstart and mappingend\n"));
- hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
- hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
- /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
- query_compress_fwd,query_compress_rev,mappingstart,mappingend,
- knownsplice_limit_low,knownsplice_limit_high,
- watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
- }
- }
- } else if (close_mappingstart_p == true) {
- debug13(printf("Halfmapping: Running gmap with close mappingstart\n"));
- hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+static Stage3end_T *
+single_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_absmq,
+ Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
+ int indexdb_size_threshold, Floors_T *floors_array,
+ double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
+ int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ bool keep_floors_p) {
+ Stage3end_T *stage3array;
+ History_T gmap_history;
+ List_T hits, hits_geneplus = NULL, hits_geneminus = NULL;
+ T this_geneplus = NULL, this_geneminus = NULL;
+ int user_maxlevel;
+ int querylength, query_lastpos, cutoff_level;
+ char *queryuc_ptr, *quality_string;
+ Compress_T query_compress_fwd = NULL, query_compress_rev = NULL;
+ bool allvalidp;
+
+#ifdef HAVE_ALLOCA
+ char *queryrc;
+#else
+ char queryrc[MAX_READLENGTH+1];
+#endif
+
+ querylength = Shortread_fulllength(queryseq);
+
+#ifndef HAVE_ALLOCA
+ if (querylength > MAX_READLENGTH) {
+ fprintf(stderr,"Read %s has length %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
+ Shortread_accession(queryseq),querylength,MAX_READLENGTH);
+ *npaths = 0;
+ return (Stage3end_T *) NULL;
+ }
+#endif
+
+ if (user_maxlevel_float < 0.0) {
+ user_maxlevel = -1;
+ } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
+ user_maxlevel = (int) rint(user_maxlevel_float * (double) querylength);
+ } else {
+ user_maxlevel = (int) user_maxlevel_float;
+ }
+
+ this_geneplus = Stage1_new(querylength);
+ this_geneminus = Stage1_new(querylength);
+
+ queryuc_ptr = Shortread_fullpointer_uc(queryseq);
+ quality_string = Shortread_quality_string(queryseq);
+ query_lastpos = querylength - index1part;
+
+ /* Limit search on repetitive sequences */
+ if (check_dinucleotides(queryuc_ptr,querylength) == false) {
+ user_maxlevel = 0;
+ }
+
+ query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength);
+ query_compress_rev = Compress_new_rev(queryuc_ptr,querylength);
+ gmap_history = History_new();
+#ifdef HAVE_ALLOCA
+ queryrc = (char *) ALLOCA((querylength+1)*sizeof(char));
+#endif
+ make_complement_buffered(queryrc,queryuc_ptr,querylength);
+
+ if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1,
+ /*first_read_p*/true) > 0) {
+ hits_geneplus = align_end(&cutoff_level,gmap_history,this_geneplus,
+ query_compress_fwd,query_compress_rev,
+ Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ keep_floors_p,/*genestrand*/+1,/*first_read_p*/true);
+ }
+
+ if (read_oligos(&allvalidp,this_geneminus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+2,
+ /*first_read_p*/true) > 0) {
+ hits_geneminus = align_end(&cutoff_level,gmap_history,this_geneminus,
+ query_compress_fwd,query_compress_rev,
+ Shortread_accession(queryseq),queryuc_ptr,queryrc,querylength,query_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ user_maxlevel,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ keep_floors_p,/*genestrand*/+2,/*first_read_p*/true);
+ }
+
+ hits = List_append(hits_geneplus,hits_geneminus);
+ hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/true,/*finalp*/true);
+ hits = Stage3end_reject_trimlengths(hits);
+ hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
+ hits = Stage3end_optimal_score(hits,cutoff_level,subopt_levels,query_compress_fwd,query_compress_rev,
+ querylength,/*keep_gmap_p*/false,/*finalp*/true);
+ hits = Stage3end_resolve_multimapping(hits);
+
+ if ((*npaths = List_length(hits)) == 0) {
+ stage3array = (Stage3end_T *) NULL;
+ } else {
+ stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */
+ stage3array = Stage3end_eval_and_sort(&(*npaths),&(*first_absmq),&(*second_absmq),
+ stage3array,maxpaths_search,queryseq,queryuc_ptr,queryrc,
+ query_compress_fwd,query_compress_rev,
+ quality_string,/*displayp*/true);
+ }
+
+ History_free(&gmap_history);
+ Compress_free(&query_compress_fwd);
+ Compress_free(&query_compress_rev);
+ Stage1_free(&this_geneminus,querylength);
+ Stage1_free(&this_geneplus,querylength);
+ return stage3array;
+}
+
+
+Stage3end_T *
+Stage1_single_read (int *npaths, int *first_absmq, int *second_absmq,
+ Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev,
+ int indexdb_size_threshold, Floors_T *floors_array,
+ double user_maxlevel_float, int indel_penalty_middle, int indel_penalty_end,
+ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
+ int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ bool keep_floors_p) {
+
+ if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
+ return single_read(&(*npaths),&(*first_absmq),&(*second_absmq),
+ queryseq,indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,user_maxlevel_float,
+ indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
+ } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED || mode == TTOC_NONSTRANDED) {
+ return single_read_tolerant_nonstranded(&(*npaths),&(*first_absmq),&(*second_absmq),queryseq,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,
+ floors_array,user_maxlevel_float,
+ indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p);
+ } else {
+ fprintf(stderr,"Do not recognize mode %d\n",mode);
+ abort();
+ }
+}
+
+
+
+/* #define HITARRAY_SHORTENDSPLICING 4 */
+/* #define HITARRAY_DISTANTSPLICING 4 */
+
+
+static List_T
+align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3,
+ Shortread_T queryseq5, Shortread_T queryseq3,
+ char *queryuc_ptr, int querylength, int query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+ char *queryrc, bool invertedp,
+#endif
+ Compress_T query_compress_fwd, Compress_T query_compress_rev,
+ struct Segment_T *plus_segments, int plus_nsegments,
+ struct Segment_T *minus_segments, int minus_nsegments,
+ Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+ Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+ Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+ Chrpos_T pairmax, Chrpos_T shortsplicedist, int user_maxlevel,
+ int genestrand, bool first_read_p) {
+ List_T hits = NULL;
+ int sensedir, sense_try;
+
+ int zero_offset = 0;
+ Univcoord_T segmentstart, segmentend;
+ Univcoord_T genomicbound, mappingstart, mappingend,
+ chroffset, chrhigh, mappingpos;
+#ifdef USE_GREEDY
+ Univcoord_T close_mappingstart_greedy, close_mappingend_greedy,
+ middle_mappingstart_greedy, middle_mappingend_greedy;
+#endif
+ Univcoord_T close_mappingstart_last, close_mappingend_last,
+ middle_mappingstart_last, middle_mappingend_last;
+ Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+ Univcoord_T close_knownsplice_limit_low, close_knownsplice_limit_high;
+ Chrpos_T chrlength;
+ Chrnum_T chrnum;
+ bool close_mappingstart_p = false, close_mappingend_p = false;
+ bool middle_mappingstart_p = false, middle_mappingend_p = false;
+ bool fallback_mappingstart_p, fallback_mappingend_p;
+ bool good_start_p, good_end_p, watsonp, favor_right_p;
+
+ int starti, endi, i;
+
+ if (hit3 == NULL) {
+ /* Both events are tested by Stage3end_anomalous_splice_p */
+ if ((chrnum = Stage3end_chrnum(hit5)) == 0) {
+ /* Translocation */
+ return (List_T) NULL;
+
+ } else if (Stage3end_hittype(hit5) == SAMECHR_SPLICE) {
+ /* A genomic event that doesn't get reflected in chrnum */
+ return (List_T) NULL;
+
+ } else if ((watsonp = Stage3end_plusp(hit5)) == true) {
+ chroffset = Stage3end_chroffset(hit5);
+ chrhigh = Stage3end_chrhigh(hit5);
+ chrlength = Stage3end_chrlength(hit5);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else if (Stage3end_anomalous_splice_p(hit5) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else {
+ genomicbound = Stage3end_genomicend(hit5);
+
+#if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 < genomicbound) {
+ zero_offset = genomicbound - genomicbound2;
+ genomicbound = genomicbound2;
+ }
+ }
+#endif
+ }
+
+ debug13(printf("Case 1: hit5 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit5),
+ Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
+ Stage3end_sensedir(hit5),genomicbound - chroffset));
+
+ knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
+ knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
+ segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax,chrhigh);
+#ifdef LONG_ENDSPLICES
+ mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chrhigh);
+#else
+ mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chrhigh);
+ debug13(printf("Original bounds E: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingend %u\n",
+ knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingend - chroffset));
+#endif
+
+ close_mappingend_last = middle_mappingend_last = Stage3end_genomicend(hit5);
+#ifdef USE_GREEDY
+ close_mappingend_greedy = middle_mappingend_greedy = segmentend;
+#endif
+
+ if (plus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentstart);
+ while (i < plus_nsegments - 1 && plus_segments[i].diagonal == (Univcoord_T) -1) {
+ i++;
+ }
+ starti = i;
+ while (plus_segments[i].diagonal < segmentend) {
+ endi = i;
+ i++;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti <= endi);
+ for (i = starti; i <= endi; i++) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
+ plus_segments[i].querypos5,plus_segments[i].querypos3));
+ if (query_lastpos - plus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 1. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ middle_mappingend_greedy = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
+ }
+#endif
+
+#ifdef LONG_ENDSPLICES
+ if ((mappingpos = add_bounded(plus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+#else
+ if ((mappingpos = plus_segments[i].diagonal) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend last to %u\n",middle_mappingend_last - chroffset));
+ }
+#endif
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,plus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = plus_segments[i].diagonal) < close_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ close_mappingend_greedy = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
+ }
+#endif
+ if ((mappingpos = plus_segments[i].diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
+ }
+ }
+
+#ifdef USE_GREEDY
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_greedy;
+ close_mappingend_greedy = middle_mappingend_greedy;
+ close_mappingend_p = true;
+ }
+#else
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ }
+#endif
+#ifdef USE_GREEDY
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
+ knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ mappingend = close_mappingend_last;
+ }
+#else
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ }
+#endif
+
+ if (close_mappingend_p == false) {
+ fallback_mappingend_p = false;
+#ifdef USE_GREEDY
+ } else if (mappingend <= close_mappingend_greedy) {
+ fallback_mappingend_p = false;
+#endif
+ } else {
+ debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
+ fallback_mappingend_p = true;
+ }
+ }
+ }
+
+ favor_right_p = false;
+
+ } else {
+ chroffset = Stage3end_chroffset(hit5);
+ chrhigh = Stage3end_chrhigh(hit5);
+ chrlength = Stage3end_chrlength(hit5);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else if (Stage3end_anomalous_splice_p(hit5) == true) {
+ /* Go from genomicstart */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicstart(hit5);
+
+ } else {
+ genomicbound = Stage3end_genomicend(hit5);
+
+#if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_end(&genomicbound2,hit5,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 > genomicbound) {
+ zero_offset = genomicbound2 - genomicbound;
+ genomicbound = genomicbound2;
+ }
+ }
+#endif
+ }
+
+ debug13(printf("Case 2: hit5 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit5),
+ Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
+ Stage3end_sensedir(hit5),genomicbound - chroffset));
+
+ knownsplice_limit_high = mappingend = segmentend = genomicbound;
+ knownsplice_limit_low = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
+ segmentstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax,chroffset);
+#ifdef LONG_ENDSPLICES
+ mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist,chroffset);
+#else
+ mappingstart = subtract_bounded(Stage3end_genomicend(hit5),pairmax + shortsplicedist_novelend,chroffset);
+#endif
+ debug13(printf("Original bounds F: knownsplice_limit_low %u, knownsplice_limit_high %u, mappingstart %u\n",
+ knownsplice_limit_low - chroffset,knownsplice_limit_high - chroffset,mappingstart - chroffset));
+
+ close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicend(hit5);
+#ifdef USE_GREEDY
+ close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
+#endif
+
+ if (minus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentend);
+ while (i >= 0 && minus_segments[i].diagonal >= segmentend) {
+ i--;
+ }
+ starti = i;
+ while (i >= 0 && minus_segments[i].diagonal > segmentstart) {
+ if (minus_segments[i].diagonal < (Univcoord_T) -1) {
+ endi = i;
+ }
+ i--;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti >= endi);
+ for (i = starti; i >= endi; i--) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
+ minus_segments[i].querypos5,minus_segments[i].querypos3));
+ if (query_lastpos - minus_segments[i].querypos3 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 2. Missing end of query, so there could be a middle splice */
+ debug13b(printf(" query_lastpos %d - querypos3 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ middle_mappingstart_greedy = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
+ }
+#endif
+#ifdef LONG_ENDSPLICES
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+#else
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+#endif
+
+ } else {
+ debug13b(printf(" query_lastpos %d - querypos3 %d < %d + %d, so using this diagonal\n",
+ query_lastpos,minus_segments[i].querypos3,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ close_mappingstart_greedy = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
+ }
+#endif
+ if ((mappingpos = subtract_bounded(minus_segments[i].diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
+ }
+
+#ifdef USE_GREEDY
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_greedy;
+ close_mappingstart_greedy = middle_mappingstart_greedy;
+ close_mappingstart_p = true;
+ }
+#else
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
+ }
+#endif
+#ifdef USE_GREEDY
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
+ knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ mappingstart = close_mappingstart_last;
+ }
+#else
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ }
+#endif
+ if (close_mappingstart_p == false) {
+ fallback_mappingstart_p = false;
+#ifdef USE_GREEDY
+ } else if (mappingstart >= close_mappingstart_greedy) {
+ fallback_mappingstart_p = false;
+#endif
+ } else {
+ debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
+ fallback_mappingstart_p = true;
+ }
+ }
+ }
+
+ favor_right_p = false;
+ }
+
+ if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) {
+ sense_try = +1;
+ } else if (sensedir == SENSE_ANTI) {
+ sense_try = -1;
+ } else {
+ sense_try = 0;
+ }
+
+ } else if (hit5 == NULL) {
+ /* Both events are tested by Stage3end_anomalous_splice_p */
+ if ((chrnum = Stage3end_chrnum(hit3)) == 0) {
+ /* Translocation */
+ return (List_T) NULL;
+
+ } else if (Stage3end_hittype(hit3) == SAMECHR_SPLICE) {
+ /* A genomic event that doesn't get reflected in chrnum */
+ return (List_T) NULL;
+
+ } else if ((watsonp = Stage3end_plusp(hit3)) == true) {
+ chroffset = Stage3end_chroffset(hit3);
+ chrhigh = Stage3end_chrhigh(hit3);
+ chrlength = Stage3end_chrlength(hit3);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else if (Stage3end_anomalous_splice_p(hit3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else {
+ genomicbound = Stage3end_genomicstart(hit3);
+
+#if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 > genomicbound) {
+ zero_offset = genomicbound2 - genomicbound;
+ genomicbound = genomicbound2;
+ }
+ }
+#endif
+ }
+
+ debug13(printf("Case 3: hit3 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit3),
+ Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
+ Stage3end_sensedir(hit3),genomicbound - chroffset));
+
+ knownsplice_limit_high = mappingend = segmentend = genomicbound;
+ knownsplice_limit_low = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
+ segmentstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax,chroffset);
+#ifdef LONG_ENDSPLICES
+ mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chroffset);
+#else
+ mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chroffset);
+#endif
+
+ close_mappingstart_last = middle_mappingstart_last = Stage3end_genomicstart(hit3);
+#ifdef USE_GREEDY
+ close_mappingstart_greedy = middle_mappingstart_greedy = segmentstart;
+#endif
+
+ if (plus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (plus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,plus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,plus_nsegments-1,plus_segments,segmentend);
+ while (i >= 0 && plus_segments[i].diagonal >= segmentend) {
+ i--;
+ }
+ starti = i;
+ while (i >= 0 && plus_segments[i].diagonal > segmentstart) {
+ if (plus_segments[i].diagonal < (Univcoord_T) -1) {
+ endi = i;
+ }
+ i--;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti >= endi);
+ for (i = starti; i >= endi; i--) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (plus_segments[i].diagonal - chroffset),(unsigned long long) plus_segments[i].diagonal,
+ plus_segments[i].querypos5,plus_segments[i].querypos3));
+ if (plus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 3. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist_novelend,chroffset)) > middle_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ middle_mappingstart_greedy = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart greedy to %u\n",middle_mappingstart_greedy - chroffset));
+ }
+#endif
+#ifdef LONG_ENDSPLICES
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength + shortsplicedist,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+#else
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) < middle_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ middle_mappingstart_last = mappingpos;
+ middle_mappingstart_p = true;
+ debug13(printf(" Redefining middle mappingstart last to %u\n",middle_mappingstart_last - chroffset));
+ }
+#endif
+
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ plus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) > close_mappingstart_greedy &&
+ mappingpos < genomicbound) {
+ close_mappingstart_greedy = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart greedy to %u\n",close_mappingstart_greedy - chroffset));
+ }
+#endif
+ if ((mappingpos = subtract_bounded(plus_segments[i].diagonal,querylength,chroffset)) < close_mappingstart_last) {
+ /* Use < for NOT_GREEDY */
+ close_mappingstart_last = mappingpos;
+ close_mappingstart_p = true;
+ debug13(printf(" Redefining close mappingstart last to %u\n",close_mappingstart_last - chroffset));
+ }
+ }
+ }
+
+#ifdef USE_GREEDY
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_greedy,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_greedy;
+ close_mappingstart_greedy = middle_mappingstart_greedy;
+ close_mappingstart_p = true;
+ }
+#else
+ if (close_mappingstart_p == true) {
+ close_knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ } else if (middle_mappingstart_p == true) {
+ debug13(printf("Using middle mappingstart\n"));
+ close_knownsplice_limit_low = middle_mappingstart_last;
+ close_mappingstart_last = middle_mappingstart_last;
+ close_mappingstart_p = true;
+ }
+#endif
+#ifdef USE_GREEDY
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_greedy) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ } else if (close_mappingstart_p == true && close_mappingstart_last != close_mappingstart_greedy) {
+ knownsplice_limit_low = subtract_bounded(close_mappingstart_last,shortsplicedist,chroffset);
+ mappingstart = close_mappingstart_last;
+ }
+#else
+ if (middle_mappingstart_p == true && middle_mappingstart_last < close_mappingstart_last) {
+ knownsplice_limit_low = middle_mappingstart_last;
+ mappingstart = middle_mappingstart_last;
+ }
+#endif
+ if (close_mappingstart_p == false) {
+ fallback_mappingstart_p = false;
+#ifdef USE_GREEDY
+ } else if (mappingstart >= close_mappingstart_greedy) {
+ fallback_mappingstart_p = false;
+#endif
+ } else {
+ debug13(printf("Fallback mappingstart = %u\n",mappingstart - chroffset));
+ fallback_mappingstart_p = true;
+ }
+ }
+ }
+
+ favor_right_p = true;
+
+ } else {
+ chroffset = Stage3end_chroffset(hit3);
+ chrhigh = Stage3end_chrhigh(hit3);
+ chrlength = Stage3end_chrlength(hit3);
+
+ if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Found primers\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else if (Stage3end_anomalous_splice_p(hit3) == true) {
+ /* Go from genomicend */
+ debug13(printf("Anomalous splice\n"));
+ genomicbound = Stage3end_genomicend(hit3);
+
+ } else {
+ genomicbound = Stage3end_genomicstart(hit3);
+
+#if 0
+ /* TODO: Previously called Shortread_find_overlap. Now with Shortread_max_overlap, can optimize this code */
+ if ((overlap = Shortread_max_overlap(queryseq5,queryseq3)) > 0 &&
+ Stage3end_genomicbound_from_start(&genomicbound2,hit3,overlap,chroffset) == true) {
+ debug13(printf("Found overlap of %d\n",overlap));
+ if (genomicbound2 < genomicbound) {
+ zero_offset = genomicbound - genomicbound2;
+ genomicbound = genomicbound2;
+ }
+ }
+#endif
+ }
+
+ debug13(printf("Case 4: hit3 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
+ Stage3end_hittype_string(hit3),
+ Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
+ Stage3end_sensedir(hit3),genomicbound - chroffset));
+
+ knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
+ knownsplice_limit_high = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
+ segmentend = add_bounded(Stage3end_genomicstart(hit3),pairmax,chrhigh);
+#ifdef LONG_ENDSPLICES
+ mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist,chrhigh);
+#else
+ mappingend = add_bounded(Stage3end_genomicstart(hit3),pairmax + shortsplicedist_novelend,chrhigh);
+#endif
+
+ close_mappingend_last = middle_mappingend_last = Stage3end_genomicstart(hit3);
+#ifdef USE_GREEDY
+ close_mappingend_greedy = middle_mappingend_greedy = segmentend;
+#endif
+
+ if (minus_nsegments > 0) {
+ /* Use segments to bound */
+ debug13(printf("Finding segments from segmentstart %u to segmentend %u (minus_nsegments %d)\n",
+ segmentstart - chroffset,segmentend - chroffset,minus_nsegments));
+ starti = endi = -1;
+ i = binary_search_segments(0,minus_nsegments-1,minus_segments,segmentstart);
+ while (i < minus_nsegments - 1 && minus_segments[i].diagonal == (Univcoord_T) -1) {
+ i++;
+ }
+ starti = i;
+ while (minus_segments[i].diagonal < segmentend) {
+ endi = i;
+ i++;
+ }
+ if (starti >= 0 && endi >= 0) {
+ debug13(printf("starti = %d, endi = %d\n",starti,endi));
+ assert(starti <= endi);
+ for (i = starti; i <= endi; i++) {
+ debug13(printf("diagonal %u (%llu), querypos %d..%d\n",
+ (Chrpos_T) (minus_segments[i].diagonal - chroffset),(unsigned long long) minus_segments[i].diagonal,
+ minus_segments[i].querypos5,minus_segments[i].querypos3));
+ if (minus_segments[i].querypos5 >= STAGE2_MIN_OLIGO + index1interval) {
+ /* Case 4. Missing start of query, so there could be a middle splice */
+ debug13b(printf(" querypos5 %d >= %d + %d, so using this diagonal plus shortsplicedist\n",
+ minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist_novelend,chrhigh)) < middle_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ middle_mappingend_greedy = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend greedy to %u\n",middle_mappingend_greedy - chroffset));
+ }
+#endif
+#ifdef LONG_ENDSPLICES
+ if ((mappingpos = add_bounded(minus_segments[i].diagonal,shortsplicedist,chrhigh)) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
+ }
+#else
+ if ((mappingpos = minus_segments[i].diagonal) > middle_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ middle_mappingend_last = mappingpos;
+ middle_mappingend_p = true;
+ debug13(printf(" Redefining middle mappingend to %u\n",middle_mappingend_last - chroffset));
+ }
+#endif
+
+ } else {
+ debug13b(printf(" querypos5 %d < %d + %d, so using this diagonal\n",
+ minus_segments[i].querypos5,STAGE2_MIN_OLIGO,index1interval));
+#ifdef USE_GREEDY
+ if ((mappingpos = minus_segments[i].diagonal) < close_mappingend_greedy &&
+ mappingpos > genomicbound) {
+ close_mappingend_greedy = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend greedy to %u\n",close_mappingend_greedy - chroffset));
+ }
+#endif
+ if ((mappingpos = minus_segments[i].diagonal) > close_mappingend_last) {
+ /* Use > for NOT_GREEDY */
+ close_mappingend_last = mappingpos;
+ close_mappingend_p = true;
+ debug13(printf(" Redefining close mappingend last to %u\n",close_mappingend_last - chroffset));
+ }
+ }
+ }
+
+#ifdef USE_GREEDY
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_greedy,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_greedy;
+ close_mappingend_greedy = middle_mappingend_greedy;
+ close_mappingend_p = true;
+ }
+#else
+ if (close_mappingend_p == true) {
+ close_knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ } else if (middle_mappingend_p == true) {
+ debug13(printf("Using middle mappingend\n"));
+ close_knownsplice_limit_high = middle_mappingend_last;
+ close_mappingend_last = middle_mappingend_last;
+ close_mappingend_p = true;
+ }
+#endif
+#ifdef USE_GREEDY
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_greedy) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ } else if (close_mappingend_p == true && close_mappingend_last != close_mappingend_greedy) {
+ knownsplice_limit_high = add_bounded(close_mappingend_last,shortsplicedist,chrhigh);
+ mappingend = close_mappingend_last;
+ }
+#else
+ if (middle_mappingend_p == true && middle_mappingend_last > close_mappingend_last) {
+ knownsplice_limit_high = middle_mappingend_last;
+ mappingend = middle_mappingend_last;
+ }
+#endif
+ if (close_mappingend_p == false) {
+ fallback_mappingend_p = false;
+#ifdef USE_GREEDY
+ } else if (mappingend <= close_mappingend_greedy) {
+ fallback_mappingend_p = false;
+#endif
+ } else {
+ debug13(printf("Fallback mappingend = %u\n",mappingend - chroffset));
+ fallback_mappingend_p = true;
+ }
+ }
+ }
+
+ favor_right_p = true;
+ }
+
+ if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) {
+ sense_try = +1;
+ } else if (sensedir == SENSE_ANTI) {
+ sense_try = -1;
+ } else {
+ sense_try = 0;
+ }
+
+ } else {
+ abort();
+ }
+
+#ifdef OLD_GENOMICBOUND
+ knownsplice_limit_low = genomicstart + querylength;
+ knownsplice_limit_high = genomicend - querylength;
+#endif
+
+ if (close_mappingstart_p == true && close_mappingend_p == true) {
+ debug13(printf("Halfmapping: Running gmap with close mappingstart and close mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,close_mappingend_last,
+ close_knownsplice_limit_low,close_knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+
+ if (good_start_p == true && good_end_p == true) {
+ /* Success */
+ } else if (gmap_rerun_p == false) {
+ debug13(printf("Skipping re-run of gmap\n"));
+ } else if (/* require both ends to be good */ 0 && good_start_p == true) {
+ if (fallback_mappingend_p == true) {
+ debug13(printf("Halfmapping: Re-running gmap with close mappingstart only\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
+ close_knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+
+ } else if (/* require both ends to be good */ 0 && good_end_p == true) {
+ if (fallback_mappingstart_p == true) {
+ debug13(printf("Halfmapping: Re-running gmap with close mappingend only\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
+ knownsplice_limit_low,close_knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ } else {
+ if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
+ debug13(printf("Halfmapping: Re-running gmap with far mappingstart and mappingend\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+ hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+ /*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
+ query_compress_fwd,query_compress_rev,mappingstart,mappingend,
+ knownsplice_limit_low,knownsplice_limit_high,
+ watsonp,genestrand,first_read_p,chrnum,chroffset,chrhigh,chrlength,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,user_maxlevel);
+ }
+ }
+
+ } else if (close_mappingstart_p == true) {
+ debug13(printf("Halfmapping: Running gmap with close mappingstart\n"));
+ hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
/*paired_favor_mode*/favor_right_p == true ? +1 : -1,zero_offset,
query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
@@ -17459,7 +17519,6 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
Stage3end_T hit5, hit3, gmap5, gmap3;
List_T p, a, b, rest;
int genestrand;
- int missing_hit, missing_gmap;
int i;
bool replacedp;
@@ -17800,8 +17859,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
int user_maxlevel_5, int user_maxlevel_3, int indel_penalty_middle, int indel_penalty_end,
int localsplicing_penalty, int distantsplicing_penalty, int min_shortend,
bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
- bool allvalidp5, bool allvalidp3, Chrpos_T pairmax,
- int maxpairedpaths, bool keep_floors_p, Shortread_T queryseq5, Shortread_T queryseq3,
+ Chrpos_T pairmax, int maxpairedpaths, bool keep_floors_p, Shortread_T queryseq5, Shortread_T queryseq3,
int genestrand) {
List_T hitpairs = NULL, p;
@@ -17811,15 +17869,15 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
List_T hitarray5[HITARRAY_N], hitarray3[HITARRAY_N];
List_T plus_anchor_segments_5 = NULL, minus_anchor_segments_5 = NULL, plus_anchor_segments_3 = NULL, minus_anchor_segments_3 = NULL;
List_T greedy5 = NULL, subs5 = NULL, terminals5 = NULL,
- indels5 = NULL, ambiguous5 = NULL, singlesplicing5 = NULL, doublesplicing5 = NULL,
+ indels5 = NULL, singlesplicing5 = NULL, doublesplicing5 = NULL,
distantsplicing5 = NULL, gmap5_hits = NULL;
List_T greedy3 = NULL, subs3 = NULL, terminals3 = NULL,
- indels3 = NULL, ambiguous3 = NULL, singlesplicing3 = NULL, doublesplicing3 = NULL,
+ indels3 = NULL, singlesplicing3 = NULL, doublesplicing3 = NULL,
distantsplicing3 = NULL, gmap3_hits = NULL;
List_T longsinglesplicing5 = NULL, longsinglesplicing3 = NULL;
int nmisses_allowed_sarray_5, nmisses_allowed_sarray_3;
int ignore_found_score, done_level_5, done_level_3, opt_level, fast_level_5, fast_level_3,
- mismatch_level_5, mismatch_level_3, nmismatches, max_mismatches_allowed;
+ mismatch_level_5, mismatch_level_3, nmismatches;
int max_splice_mismatches_5 = -1, max_splice_mismatches_3 = -1, i;
int nhits5 = 0, nhits3 = 0, nsplicepairs5 = 0, nsplicepairs3 = 0;
List_T *donors_plus_5, *antidonors_plus_5, *acceptors_plus_5, *antiacceptors_plus_5,
@@ -17832,11 +17890,13 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
bool any_omitted_p_5, any_omitted_p_3;
Floors_T floors5, floors3;
bool alloc_floors_p_5 = false, alloc_floors_p_3 = false, floors5_computed_p = false, floors3_computed_p = false,
- segments5_computed_p = false, segments3_computed_p = false, alloc5p, alloc3p;
+ segments5_computed_p = false, segments3_computed_p = false;
int best_score_paired;
bool found_terminals_p = false;
int nconcordant = 0, nsamechr = 0;
Indexdb_T plus_indexdb_5, plus_indexdb_3, minus_indexdb_5, minus_indexdb_3;
+ bool allvalidp5, allvalidp3;
+
if (genestrand == +2) {
plus_indexdb_5 = indexdb_rev;
@@ -17860,13 +17920,25 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
*found_score = querylength5 + querylength3;
ignore_found_score = querylength5 + querylength3;
- fast_level_5 = (querylength5 + index1interval - 1)/spansize - NREQUIRED_FAST;
- fast_level_3 = (querylength3 + index1interval - 1)/spansize - NREQUIRED_FAST;
+ if (querylength5 < min_kmer_readlength) {
+ fast_level_5 = querylength5 - 1 - NREQUIRED_FAST;
+ debug(printf("fast_level_5 %d = querylength %d - 1 - nrequired_fast %d\n",
+ fast_level_5,querylength5,NREQUIRED_FAST));
+ } else {
+ fast_level_5 = (querylength5 + index1interval - 1)/spansize - NREQUIRED_FAST;
+ debug(printf("fast_level_5 %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
+ fast_level_5,querylength5,index1interval,spansize,NREQUIRED_FAST));
+ }
- debug(printf("fast_level_5 %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
- fast_level_5,querylength5,index1interval,spansize,NREQUIRED_FAST));
- debug(printf("fast_level_3 %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
- fast_level_3,querylength3,index1interval,spansize,NREQUIRED_FAST));
+ if (querylength3 < min_kmer_readlength) {
+ fast_level_3 = querylength3 - 1 - NREQUIRED_FAST;
+ debug(printf("fast_level_3 %d = querylength %d - 1 - nrequired_fast %d\n",
+ fast_level_3,querylength3,NREQUIRED_FAST));
+ } else {
+ fast_level_3 = (querylength3 + index1interval - 1)/spansize - NREQUIRED_FAST;
+ debug(printf("fast_level_3 %d = (querylength %d + index1interval %d - 1)/spansize %d - nrequired_fast %d\n",
+ fast_level_3,querylength3,index1interval,spansize,NREQUIRED_FAST));
+ }
#if 0
/* This prevents complete_mm procedure, needed for short reads */
@@ -18041,6 +18113,13 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
}
#endif
+ if (querylength5 < min_kmer_readlength) {
+ spanningset5p = false;
+ }
+ if (querylength3 < min_kmer_readlength) {
+ spanningset3p = false;
+ }
+
/* Search 2: Exact/subs via spanning set algorithm */
if (spanningset5p == true || spanningset3p == true) {
/* 1A. Exact. Requires compress if cmet or genomealt. Creates and uses spanning set. */
@@ -18261,6 +18340,13 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
debug(printf("Test for completeset using better_free_end_exists_p: completeset5p %d, completeset3p %d\n",completeset5p,completeset3p));
}
+ if (querylength5 < min_kmer_readlength) {
+ completeset5p = false;
+ }
+ if (querylength3 < min_kmer_readlength) {
+ completeset3p = false;
+ }
+
if (completeset5p == true) {
debug(printf("Performing complete set analysis on 5' end\n"));
if (this5->read_oligos_p == false) {
@@ -19746,7 +19832,7 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
user_maxlevel_5,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,keep_floors_p,genestrand,/*first_read_p*/true);
+ keep_floors_p,genestrand,/*first_read_p*/true);
}
if ((*nhits5 = List_length(singlehits5)) == 0) {
@@ -19774,7 +19860,7 @@ realign_separately (Stage3end_T **stage3array5, int *nhits5, int *first_absmq5,
user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp3,keep_floors_p,genestrand,/*first_read_p*/false);
+ keep_floors_p,genestrand,/*first_read_p*/false);
}
if ((*nhits3 = List_length(singlehits3)) == 0) {
@@ -20345,8 +20431,6 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
int user_maxlevel_5, user_maxlevel_3;
int found_score, cutoff_level_5, cutoff_level_3;
int querylength5, querylength3, query5_lastpos, query3_lastpos;
- int noligos5, noligos3;
- bool allvalidp5, allvalidp3;
#if 0
int maxpairedpaths = 10*maxpaths; /* For computation, not for printing. */
#else
@@ -20364,258 +20448,128 @@ paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T *final
querylength5 = Shortread_fulllength(queryseq5);
querylength3 = Shortread_fulllength(queryseq3);
-#ifdef HAVE_ALLOCA
- queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
- queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
-#endif
-
- if (querylength5 < min_readlength && querylength3 < min_readlength) {
- fprintf(stderr,"Paired-read %s has lengths %d and %d < min_readlength %d. Skipping.\n",
- Shortread_accession(queryseq5),querylength5,querylength3,min_readlength);
- /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
- *npaths = *nhits5 = *nhits3 = 0;
- *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
- return (Stage3pair_T *) NULL;
-
#ifndef HAVE_ALLOCA
- } else if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) {
+ if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) {
fprintf(stderr,"Paired-read %s has lengths %d and %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
Shortread_accession(queryseq5),querylength5,querylength3,MAX_READLENGTH);
*npaths = *nhits5 = *nhits3 = 0;
*stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
return (Stage3pair_T *) NULL;
+ }
+#else
+ queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
+ queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
#endif
- } else if (querylength5 < min_readlength) {
- /* Solve just 3' end */
- fprintf(stderr,"First end of paired-read %s has length %d < min_readlength %d. Aligning second end only.\n",
- Shortread_accession(queryseq5),querylength5,min_readlength);
- *nhits5 = 0;
- *stage3array5 = (Stage3end_T *) NULL;
-
- if (user_maxlevel_float < 0.0) {
- user_maxlevel_3 = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel_3 = (int) rint(user_maxlevel_float * (double) querylength3);
- } else {
- user_maxlevel_3 = (int) user_maxlevel_float;
- }
+ if (user_maxlevel_float < 0.0) {
+ user_maxlevel_5 = user_maxlevel_3 = -1;
+ } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
+ user_maxlevel_5 = (int) rint(user_maxlevel_float * (double) querylength5);
+ user_maxlevel_3 = (int) rint(user_maxlevel_float * (double) querylength3);
+ } else {
+ user_maxlevel_5 = user_maxlevel_3 = (int) user_maxlevel_float;
+ }
+
+ this5 = Stage1_new(querylength5);
+ this3 = Stage1_new(querylength3);
+ queryuc_ptr_5 = Shortread_fullpointer_uc(queryseq5);
+ queryuc_ptr_3 = Shortread_fullpointer_uc(queryseq3);
+ quality_string_5 = Shortread_quality_string(queryseq5);
+ quality_string_3 = Shortread_quality_string(queryseq3);
+ query5_lastpos = querylength5 - index1part;
+ query3_lastpos = querylength3 - index1part;
+
+ /* Limit search on repetitive sequences */
+ if (check_dinucleotides(queryuc_ptr_5,querylength5) == false) {
+ user_maxlevel_5 = 0;
+ }
+ if (check_dinucleotides(queryuc_ptr_3,querylength3) == false) {
+ user_maxlevel_3 = 0;
+ }
+
+ query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
+ query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
+ query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
+ query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
+ gmap_history_5 = History_new();
+ gmap_history_3 = History_new();
+ make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
+ make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
+
+ hitpairs = align_pair(&abort_pairing_p,&found_score,&cutoff_level_5,&cutoff_level_3,
+ &samechr,&conc_transloc,gmap_history_5,gmap_history_3,
+ &hits5,&hits3,this5,this3,query5_compress_fwd,query5_compress_rev,
+ query3_compress_fwd,query3_compress_rev,
+ queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
+ querylength5,querylength3,query5_lastpos,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- this3 = Stage1_new(querylength3);
- queryuc_ptr_3 = Shortread_fullpointer_uc(queryseq3);
- quality_string_3 = Shortread_quality_string(queryseq3);
- query3_lastpos = querylength3 - index1part;
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr_3,querylength3) == false) {
- user_maxlevel_3 = 0;
- }
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ pairmax,maxpairedpaths,keep_floors_p,queryseq5,queryseq3,/*genestrand*/0);
- query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
- query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
- gmap_history_3 = History_new();
- make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
+ if (abort_pairing_p == true) {
+ debug16(printf("abort_pairing_p is true\n"));
+ paired_results_free(this5,this3,hitpairs,samechr,conc_transloc,
+ hits5,hits3,querylength5,querylength3);
- hits3 = align_end(&cutoff_level_3,gmap_history_3,this3,
- query3_compress_fwd,query3_compress_rev,
- Shortread_accession(queryseq5),queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp3,keep_floors_p,/*genestrand*/0,/*first_read_p*/false);
+ this5 = Stage1_new(querylength5);
+ this3 = Stage1_new(querylength3);
+ realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ gmap_history_5,gmap_history_3,this5,this3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ keep_floors_p,/*genestrand*/0);
- if ((*nhits3 = List_length(hits3)) == 0) {
- *stage3array3 = (Stage3end_T *) NULL;
- } else {
- *stage3array3 = (Stage3end_T *) List_to_array_out(hits3,NULL); List_free(&hits3); /* Return value */
- *stage3array3 = Stage3end_eval_and_sort(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- *stage3array3,maxpaths_search,queryseq3,
- queryuc_ptr_3,queryrc3,
- query3_compress_fwd,query3_compress_rev,
- quality_string_3,/*displayp*/true);
- }
*npaths = 0;
*final_pairtype = UNPAIRED;
History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
Compress_free(&query3_compress_fwd);
Compress_free(&query3_compress_rev);
+ Stage1_free(&this5,querylength5);
Stage1_free(&this3,querylength3);
return (Stage3pair_T *) NULL;
- } else if (querylength3 < min_readlength) {
- /* Solve just 5' end */
- fprintf(stderr,"Second end of paired-read %s has length %d < min_readlength %d. Aligning first end only.\n",
- Shortread_accession(queryseq5),querylength3,min_readlength);
- *nhits3 = 0;
- *stage3array3 = (Stage3end_T *) NULL;
-
- if (user_maxlevel_float < 0.0) {
- user_maxlevel_5 = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel_5 = (int) rint(user_maxlevel_float * (double) querylength5);
- } else {
- user_maxlevel_5 = (int) user_maxlevel_float;
- }
-
- this5 = Stage1_new(querylength5);
- queryuc_ptr_5 = Shortread_fullpointer_uc(queryseq5);
- quality_string_5 = Shortread_quality_string(queryseq5);
- query5_lastpos = querylength5 - index1part;
-
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr_5,querylength5) == false) {
- user_maxlevel_5 = 0;
- }
-
- query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
- query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
- gmap_history_5 = History_new();
- make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
-
- hits5 = align_end(&cutoff_level_5,gmap_history_5,this5,
- query5_compress_fwd,query5_compress_rev,
- Shortread_accession(queryseq5),queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,keep_floors_p,/*genestrand*/0,/*first_read_p*/true);
+ } else {
+ stage3pairarray =
+ consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
+ &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ hitpairs,samechr,conc_transloc,hits5,hits3,gmap_history_5,gmap_history_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ &this5->plus_segments,&this5->plus_nsegments,&this5->minus_segments,&this5->minus_nsegments,
+ &this3->plus_segments,&this3->plus_nsegments,&this3->minus_segments,&this3->minus_nsegments,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,
+ localsplicing_penalty,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
- if ((*nhits5 = List_length(hits5)) == 0) {
- *stage3array5 = (Stage3end_T *) NULL;
- } else {
- *stage3array5 = (Stage3end_T *) List_to_array_out(hits5,NULL); List_free(&hits5); /* Return value */
- *stage3array5 = Stage3end_eval_and_sort(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- *stage3array5,maxpaths_search,queryseq5,
- queryuc_ptr_5,queryrc5,
- query5_compress_fwd,query5_compress_rev,
- quality_string_5,/*displayp*/true);
- }
- *npaths = 0;
- *final_pairtype = UNPAIRED;
+ History_free(&gmap_history_3);
History_free(&gmap_history_5);
Compress_free(&query5_compress_fwd);
Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
Stage1_free(&this5,querylength5);
- return (Stage3pair_T *) NULL;
-
- } else {
- if (user_maxlevel_float < 0.0) {
- user_maxlevel_5 = user_maxlevel_3 = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel_5 = (int) rint(user_maxlevel_float * (double) querylength5);
- user_maxlevel_3 = (int) rint(user_maxlevel_float * (double) querylength3);
- } else {
- user_maxlevel_5 = user_maxlevel_3 = (int) user_maxlevel_float;
- }
-
- this5 = Stage1_new(querylength5);
- this3 = Stage1_new(querylength3);
- queryuc_ptr_5 = Shortread_fullpointer_uc(queryseq5);
- queryuc_ptr_3 = Shortread_fullpointer_uc(queryseq3);
- quality_string_5 = Shortread_quality_string(queryseq5);
- quality_string_3 = Shortread_quality_string(queryseq3);
- query5_lastpos = querylength5 - index1part;
- query3_lastpos = querylength3 - index1part;
-
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr_5,querylength5) == false) {
- user_maxlevel_5 = 0;
- }
- if (check_dinucleotides(queryuc_ptr_3,querylength3) == false) {
- user_maxlevel_3 = 0;
- }
-
- query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
- query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
- query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
- query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
- gmap_history_5 = History_new();
- gmap_history_3 = History_new();
- make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
- make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
-
- hitpairs = align_pair(&abort_pairing_p,&found_score,&cutoff_level_5,&cutoff_level_3,
- &samechr,&conc_transloc,gmap_history_5,gmap_history_3,
- &hits5,&hits3,this5,this3,query5_compress_fwd,query5_compress_rev,
- query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
- querylength5,querylength3,query5_lastpos,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
-
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
-
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
- queryseq5,queryseq3,/*genestrand*/0);
-
- if (abort_pairing_p == true) {
- debug16(printf("abort_pairing_p is true\n"));
- paired_results_free(this5,this3,hitpairs,samechr,conc_transloc,
- hits5,hits3,querylength5,querylength3);
-
- this5 = Stage1_new(querylength5);
- this3 = Stage1_new(querylength3);
- realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- gmap_history_5,gmap_history_3,this5,this3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- keep_floors_p,/*genestrand*/0);
-
- *npaths = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this5,querylength5);
- Stage1_free(&this3,querylength3);
- return (Stage3pair_T *) NULL;
-
- } else {
- stage3pairarray =
- consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
- &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs,samechr,conc_transloc,hits5,hits3,gmap_history_5,gmap_history_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- &this5->plus_segments,&this5->plus_nsegments,&this5->minus_segments,&this5->minus_nsegments,
- &this3->plus_segments,&this3->plus_nsegments,&this3->minus_segments,&this3->minus_nsegments,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,
- localsplicing_penalty,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
-
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this5,querylength5);
- Stage1_free(&this3,querylength3);
- return stage3pairarray;
- }
+ Stage1_free(&this3,querylength3);
+ return stage3pairarray;
}
}
@@ -20647,8 +20601,6 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
int found_score_geneplus, found_score_geneminus;
int cutoff_level_5, cutoff_level_3;
int querylength5, querylength3, query5_lastpos, query3_lastpos;
- int noligos5, noligos3;
- bool allvalidp5, allvalidp3;
#if 0
int maxpairedpaths = 10*maxpaths; /* For computation, not for printing. */
#else
@@ -20670,478 +20622,290 @@ paired_read_tolerant_nonstranded (int *npaths, int *first_absmq, int *second_abs
querylength5 = Shortread_fulllength(queryseq5);
querylength3 = Shortread_fulllength(queryseq3);
-#ifdef HAVE_ALLOCA
- queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
- queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
-#endif
-
- if (querylength5 < min_readlength && querylength3 < min_readlength) {
- fprintf(stderr,"Paired-read %s has lengths %d and %d < min_readlength %d. Skipping.\n",
- Shortread_accession(queryseq5),querylength5,querylength3,min_readlength);
- /* fprintf(stderr,"You may want to build a genomic index with a smaller k-mer value using the -k flag to gmap_build\n"); */
- *npaths = *nhits5 = *nhits3 = 0;
- *stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
- return (Stage3pair_T *) NULL;
-
#ifndef HAVE_ALLOCA
- } else if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) {
+ if (querylength5 > MAX_READLENGTH || querylength3 > MAX_READLENGTH) {
fprintf(stderr,"Paired-read %s has lengths %d and %d > MAX_READLENGTH %d. Either run configure and make again with a higher value of MAX_READLENGTH, or consider using GMAP instead.\n",
Shortread_accession(queryseq5),querylength5,querylength3,MAX_READLENGTH);
*npaths = *nhits5 = *nhits3 = 0;
*stage3array5 = *stage3array3 = (Stage3end_T *) NULL;
return (Stage3pair_T *) NULL;
+ }
+#else
+ queryrc5 = (char *) ALLOCA((querylength5+1)*sizeof(char));
+ queryrc3 = (char *) ALLOCA((querylength3+1)*sizeof(char));
#endif
- } else if (querylength5 < min_readlength) {
- /* Solve just 3' end */
- fprintf(stderr,"First end of paired-read %s has length %d < min_readlength %d. Aligning second end only.\n",
- Shortread_accession(queryseq5),querylength5,min_readlength);
- *nhits5 = 0;
- *stage3array5 = (Stage3end_T *) NULL;
-
- if (user_maxlevel_float < 0.0) {
- user_maxlevel_3 = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel_3 = (int) rint(user_maxlevel_float * (double) querylength3);
- } else {
- user_maxlevel_3 = (int) user_maxlevel_float;
- }
-
- this_geneplus_3 = Stage1_new(querylength3);
- this_geneminus_3 = Stage1_new(querylength3);
-
- queryuc_ptr_3 = Shortread_fullpointer_uc(queryseq3);
- quality_string_3 = Shortread_quality_string(queryseq3);
- query3_lastpos = querylength3 - index1part;
-
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr_3,querylength3) == false) {
- user_maxlevel_3 = 0;
- }
-
- query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
- query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
- gmap_history_3 = History_new();
- make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
-
- if (read_oligos(&allvalidp3,this_geneplus_3,queryuc_ptr_3,querylength3,query3_lastpos,/*genestrand*/+1,
- /*first_read_p*/false) == 0) {
- hits_geneplus_3 = (List_T) NULL;
- } else {
- hits_geneplus_3 = align_end(&cutoff_level_3,gmap_history_3,this_geneplus_3,
- query3_compress_fwd,query3_compress_rev,
- Shortread_accession(queryseq5),queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_fwd,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
+ if (user_maxlevel_float < 0.0) {
+ user_maxlevel_5 = user_maxlevel_3 = -1;
+ } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
+ user_maxlevel_5 = (int) rint(user_maxlevel_float * (double) querylength5);
+ user_maxlevel_3 = (int) rint(user_maxlevel_float * (double) querylength3);
+ } else {
+ user_maxlevel_5 = user_maxlevel_3 = (int) user_maxlevel_float;
+ }
+
+ this_geneplus_5 = Stage1_new(querylength5);
+ this_geneplus_3 = Stage1_new(querylength3);
+ this_geneminus_5 = Stage1_new(querylength5);
+ this_geneminus_3 = Stage1_new(querylength3);
+
+ queryuc_ptr_5 = Shortread_fullpointer_uc(queryseq5);
+ queryuc_ptr_3 = Shortread_fullpointer_uc(queryseq3);
+ quality_string_5 = Shortread_quality_string(queryseq5);
+ quality_string_3 = Shortread_quality_string(queryseq3);
+ query5_lastpos = querylength5 - index1part;
+ query3_lastpos = querylength3 - index1part;
+
+ /* Limit search on repetitive sequences */
+ if (check_dinucleotides(queryuc_ptr_5,querylength5) == false) {
+ user_maxlevel_5 = 0;
+ }
+ if (check_dinucleotides(queryuc_ptr_3,querylength3) == false) {
+ user_maxlevel_3 = 0;
+ }
+
+ query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
+ query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
+ query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
+ query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
+ gmap_history_5 = History_new();
+ gmap_history_3 = History_new();
+ make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
+ make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
+
+ abort_pairing_p_geneplus = false;
+ hitpairs_geneplus = align_pair(&abort_pairing_p_geneplus,&found_score_geneplus,
+ &cutoff_level_5,&cutoff_level_3,
+ &samechr_geneplus,&conc_transloc_geneplus,
+ gmap_history_5,gmap_history_3,
+ &hits_geneplus_5,&hits_geneplus_3,this_geneplus_5,this_geneplus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
+ querylength5,querylength3,query5_lastpos,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ pairmax,maxpairedpaths,keep_floors_p,
+ queryseq5,queryseq3,/*genestrand*/+1);
+
+ abort_pairing_p_geneminus = false;
+ hitpairs_geneminus = align_pair(&abort_pairing_p_geneminus,&found_score_geneminus,
+ &cutoff_level_5,&cutoff_level_3,
+ &samechr_geneminus,&conc_transloc_geneminus,
+ gmap_history_5,gmap_history_3,
+ &hits_geneminus_5,&hits_geneminus_3,this_geneminus_5,this_geneminus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
+ querylength5,querylength3,query5_lastpos,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+
+ oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
localsplicing_penalty,distantsplicing_penalty,min_shortend,
allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp3,keep_floors_p,/*genestrand*/+1,/*first_read_p*/false);
- }
+ pairmax,maxpairedpaths,keep_floors_p,queryseq5,queryseq3,/*genestrand*/+2);
- if (read_oligos(&allvalidp3,this_geneminus_3,queryuc_ptr_3,querylength3,query3_lastpos,/*genestrand*/+2,
- /*first_read_p*/false) == 0) {
- hits_geneminus_3 = (List_T) NULL;
- } else {
- hits_geneminus_3 = align_end(&cutoff_level_3,gmap_history_3,this_geneminus_3,
- query3_compress_fwd,query3_compress_rev,
- Shortread_accession(queryseq5),queryuc_ptr_3,queryrc3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp3,keep_floors_p,/*genestrand*/+2,/*first_read_p*/false);
- }
+ if (found_score_geneplus < found_score_geneminus) {
+ paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
+ hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
- hits3 = List_append(hits_geneplus_3,hits_geneminus_3);
- if ((*nhits3 = List_length(hits3)) == 0) {
- *stage3array3 = (Stage3end_T *) NULL;
- } else {
- *stage3array3 = (Stage3end_T *) List_to_array_out(hits3,NULL); List_free(&hits3); /* Return value */
- *stage3array3 = Stage3end_eval_and_sort(&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- *stage3array3,maxpaths_search,queryseq3,
- queryuc_ptr_3,queryrc3,
- query3_compress_fwd,query3_compress_rev,
- quality_string_3,/*displayp*/true);
- }
+ if (abort_pairing_p_geneplus == true) {
+ debug16(printf("abort_pairing_p_geneplus is true\n"));
+ paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
+ hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
+
+ this_geneplus_5 = Stage1_new(querylength5);
+ this_geneplus_3 = Stage1_new(querylength3);
+ realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ gmap_history_5,gmap_history_3,this_geneplus_5,this_geneplus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ keep_floors_p,/*genestrand*/+1);
*npaths = 0;
*final_pairtype = UNPAIRED;
History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
Compress_free(&query3_compress_fwd);
Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneminus_3,querylength3);
+ Stage1_free(&this_geneplus_5,querylength5);
Stage1_free(&this_geneplus_3,querylength3);
return (Stage3pair_T *) NULL;
- } else if (querylength3 < min_readlength) {
- /* Solve just 5' end */
- fprintf(stderr,"Second end of paired-read %s has length %d < min_readlength %d. Aligning first end only.\n",
- Shortread_accession(queryseq5),querylength3,min_readlength);
- *nhits3 = 0;
- *stage3array3 = (Stage3end_T *) NULL;
-
- if (user_maxlevel_float < 0.0) {
- user_maxlevel_5 = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel_5 = (int) rint(user_maxlevel_float * (double) querylength5);
- } else {
- user_maxlevel_5 = (int) user_maxlevel_float;
- }
-
- this_geneplus_5 = Stage1_new(querylength5);
- this_geneminus_5 = Stage1_new(querylength5);
-
- queryuc_ptr_5 = Shortread_fullpointer_uc(queryseq5);
- quality_string_5 = Shortread_quality_string(queryseq5);
- query5_lastpos = querylength5 - index1part;
-
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr_5,querylength5) == false) {
- user_maxlevel_5 = 0;
- }
-
- query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
- query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
- gmap_history_5 = History_new();
- make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
+ } else {
+ plus_segments_genestrand_5[+1] = this_geneplus_5->plus_segments;
+ plus_nsegments_genestrand_5[+1] = this_geneplus_5->plus_nsegments;
+ minus_segments_genestrand_5[+1] = this_geneplus_5->minus_segments;
+ minus_nsegments_genestrand_5[+1] = this_geneplus_5->minus_nsegments;
+
+ plus_segments_genestrand_3[+1] = this_geneplus_3->plus_segments;
+ plus_nsegments_genestrand_3[+1] = this_geneplus_3->plus_nsegments;
+ minus_segments_genestrand_3[+1] = this_geneplus_3->minus_segments;
+ minus_nsegments_genestrand_3[+1] = this_geneplus_3->minus_nsegments;
+
+ stage3pairarray =
+ consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
+ &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
+ hits_geneplus_5,hits_geneplus_3,gmap_history_5,gmap_history_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
+ plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,localsplicing_penalty,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this_geneplus_5,querylength5);
+ Stage1_free(&this_geneplus_3,querylength3);
+ return stage3pairarray;
+ }
- if (read_oligos(&allvalidp5,this_geneplus_5,queryuc_ptr_5,querylength5,query5_lastpos,/*genestrand*/+1,
- /*first_read_p*/true) == 0) {
- hits_geneplus_5 = (List_T) NULL;
- } else {
- hits_geneplus_5 = align_end(&cutoff_level_5,gmap_history_5,this_geneplus_5,
- query5_compress_fwd,query5_compress_rev,
- Shortread_accession(queryseq5),queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,keep_floors_p,/*genestrand*/+1,/*first_read_p*/true);
- }
+ } else if (found_score_geneminus < found_score_geneplus) {
+ paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
+ hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
- if (read_oligos(&allvalidp5,this_geneminus_5,queryuc_ptr_5,querylength5,query5_lastpos,/*genestrand*/+2,
- /*first_read_p*/true) == 0) {
- hits_geneminus_5 = (List_T) NULL;
- } else {
- hits_geneminus_5 = align_end(&cutoff_level_5,gmap_history_5,this_geneminus_5,
- query5_compress_fwd,query5_compress_rev,
- Shortread_accession(queryseq5),queryuc_ptr_5,queryrc5,querylength5,query5_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,
- floors_array,oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- user_maxlevel_5,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,keep_floors_p,/*genestrand*/+2,/*first_read_p*/true);
- }
+ if (abort_pairing_p_geneminus == true) {
+ debug16(printf("abort_pairing_p_geneminus is true\n"));
+ paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
+ hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
- hits5 = List_append(hits_geneplus_5,hits_geneminus_5);
- if ((*nhits5 = List_length(hits5)) == 0) {
- *stage3array5 = (Stage3end_T *) NULL;
- } else {
- *stage3array5 = (Stage3end_T *) List_to_array_out(hits5,NULL); List_free(&hits5); /* Return value */
- *stage3array5 = Stage3end_eval_and_sort(&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- *stage3array5,maxpaths_search,queryseq5,
- queryuc_ptr_5,queryrc5,
- query5_compress_fwd,query5_compress_rev,
- quality_string_5,/*displayp*/true);
- }
+ this_geneminus_5 = Stage1_new(querylength5);
+ this_geneminus_3 = Stage1_new(querylength3);
+ realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ gmap_history_5,gmap_history_3,this_geneminus_5,this_geneminus_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
+ user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
+ allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
+ localsplicing_penalty,distantsplicing_penalty,min_shortend,
+ oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+ keep_floors_p,/*genestrand*/+2);
*npaths = 0;
*final_pairtype = UNPAIRED;
+ History_free(&gmap_history_3);
History_free(&gmap_history_5);
Compress_free(&query5_compress_fwd);
Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
Stage1_free(&this_geneminus_5,querylength5);
- Stage1_free(&this_geneplus_5,querylength5);
+ Stage1_free(&this_geneminus_3,querylength3);
return (Stage3pair_T *) NULL;
} else {
- if (user_maxlevel_float < 0.0) {
- user_maxlevel_5 = user_maxlevel_3 = -1;
- } else if (user_maxlevel_float > 0.0 && user_maxlevel_float < 1.0) {
- user_maxlevel_5 = (int) rint(user_maxlevel_float * (double) querylength5);
- user_maxlevel_3 = (int) rint(user_maxlevel_float * (double) querylength3);
- } else {
- user_maxlevel_5 = user_maxlevel_3 = (int) user_maxlevel_float;
- }
-
- this_geneplus_5 = Stage1_new(querylength5);
- this_geneplus_3 = Stage1_new(querylength3);
- this_geneminus_5 = Stage1_new(querylength5);
- this_geneminus_3 = Stage1_new(querylength3);
-
- queryuc_ptr_5 = Shortread_fullpointer_uc(queryseq5);
- queryuc_ptr_3 = Shortread_fullpointer_uc(queryseq3);
- quality_string_5 = Shortread_quality_string(queryseq5);
- quality_string_3 = Shortread_quality_string(queryseq3);
- query5_lastpos = querylength5 - index1part;
- query3_lastpos = querylength3 - index1part;
-
- /* Limit search on repetitive sequences */
- if (check_dinucleotides(queryuc_ptr_5,querylength5) == false) {
- user_maxlevel_5 = 0;
- }
- if (check_dinucleotides(queryuc_ptr_3,querylength3) == false) {
- user_maxlevel_3 = 0;
- }
-
- query5_compress_fwd = Compress_new_fwd(queryuc_ptr_5,querylength5);
- query5_compress_rev = Compress_new_rev(queryuc_ptr_5,querylength5);
- query3_compress_fwd = Compress_new_fwd(queryuc_ptr_3,querylength3);
- query3_compress_rev = Compress_new_rev(queryuc_ptr_3,querylength3);
- gmap_history_5 = History_new();
- gmap_history_3 = History_new();
- make_complement_buffered(queryrc5,queryuc_ptr_5,querylength5);
- make_complement_buffered(queryrc3,queryuc_ptr_3,querylength3);
-
- abort_pairing_p_geneplus = false;
- hitpairs_geneplus = align_pair(&abort_pairing_p_geneplus,&found_score_geneplus,
- &cutoff_level_5,&cutoff_level_3,
- &samechr_geneplus,&conc_transloc_geneplus,
- gmap_history_5,gmap_history_3,
- &hits_geneplus_5,&hits_geneplus_3,this_geneplus_5,this_geneplus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
- querylength5,querylength3,query5_lastpos,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
-
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
-
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
- queryseq5,queryseq3,/*genestrand*/+1);
-
- abort_pairing_p_geneminus = false;
- hitpairs_geneminus = align_pair(&abort_pairing_p_geneminus,&found_score_geneminus,
- &cutoff_level_5,&cutoff_level_3,
- &samechr_geneminus,&conc_transloc_geneminus,
- gmap_history_5,gmap_history_3,
- &hits_geneminus_5,&hits_geneminus_3,this_geneminus_5,this_geneminus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryuc_ptr_5,queryuc_ptr_3,queryrc5,queryrc3,
- querylength5,querylength3,query5_lastpos,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
-
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
-
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- allvalidp5,allvalidp3,pairmax,maxpairedpaths,keep_floors_p,
- queryseq5,queryseq3,/*genestrand*/+2);
-
- if (found_score_geneplus < found_score_geneminus) {
- paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
-
- if (abort_pairing_p_geneplus == true) {
- debug16(printf("abort_pairing_p_geneplus is true\n"));
- paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
-
- this_geneplus_5 = Stage1_new(querylength5);
- this_geneplus_3 = Stage1_new(querylength3);
- realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- gmap_history_5,gmap_history_3,this_geneplus_5,this_geneplus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- keep_floors_p,/*genestrand*/+1);
-
- *npaths = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneplus_5,querylength5);
- Stage1_free(&this_geneplus_3,querylength3);
- return (Stage3pair_T *) NULL;
-
- } else {
- plus_segments_genestrand_5[+1] = this_geneplus_5->plus_segments;
- plus_nsegments_genestrand_5[+1] = this_geneplus_5->plus_nsegments;
- minus_segments_genestrand_5[+1] = this_geneplus_5->minus_segments;
- minus_nsegments_genestrand_5[+1] = this_geneplus_5->minus_nsegments;
-
- plus_segments_genestrand_3[+1] = this_geneplus_3->plus_segments;
- plus_nsegments_genestrand_3[+1] = this_geneplus_3->plus_nsegments;
- minus_segments_genestrand_3[+1] = this_geneplus_3->minus_segments;
- minus_nsegments_genestrand_3[+1] = this_geneplus_3->minus_nsegments;
-
- stage3pairarray =
- consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
- &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- hits_geneplus_5,hits_geneplus_3,gmap_history_5,gmap_history_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
- plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,
- localsplicing_penalty,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneplus_5,querylength5);
- Stage1_free(&this_geneplus_3,querylength3);
- return stage3pairarray;
- }
-
- } else if (found_score_geneminus < found_score_geneplus) {
- paired_results_free(this_geneplus_5,this_geneplus_3,hitpairs_geneplus,samechr_geneplus,conc_transloc_geneplus,
- hits_geneplus_5,hits_geneplus_3,querylength5,querylength3);
-
- if (abort_pairing_p_geneminus == true) {
- debug16(printf("abort_pairing_p_geneminus is true\n"));
- paired_results_free(this_geneminus_5,this_geneminus_3,hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- hits_geneminus_5,hits_geneminus_3,querylength5,querylength3);
-
- this_geneminus_5 = Stage1_new(querylength5);
- this_geneminus_3 = Stage1_new(querylength3);
- realign_separately(stage3array5,&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- stage3array3,&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- gmap_history_5,gmap_history_3,this_geneminus_5,this_geneminus_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array,
- user_maxlevel_5,user_maxlevel_3,indel_penalty_middle,indel_penalty_end,
- allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
- localsplicing_penalty,distantsplicing_penalty,min_shortend,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
- keep_floors_p,/*genestrand*/+2);
-
- *npaths = 0;
- *final_pairtype = UNPAIRED;
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneminus_5,querylength5);
- Stage1_free(&this_geneminus_3,querylength3);
- return (Stage3pair_T *) NULL;
+ plus_segments_genestrand_5[+2] = this_geneminus_5->plus_segments;
+ plus_nsegments_genestrand_5[+2] = this_geneminus_5->plus_nsegments;
+ minus_segments_genestrand_5[+2] = this_geneminus_5->minus_segments;
+ minus_nsegments_genestrand_5[+2] = this_geneminus_5->minus_nsegments;
+
+ plus_segments_genestrand_3[+2] = this_geneminus_3->plus_segments;
+ plus_nsegments_genestrand_3[+2] = this_geneminus_3->plus_nsegments;
+ minus_segments_genestrand_3[+2] = this_geneminus_3->minus_segments;
+ minus_nsegments_genestrand_3[+2] = this_geneminus_3->minus_nsegments;
+
+ stage3pairarray =
+ consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
+ &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
+ hits_geneminus_5,hits_geneminus_3,gmap_history_5,gmap_history_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
+ plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,localsplicing_penalty,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this_geneminus_5,querylength5);
+ Stage1_free(&this_geneminus_3,querylength3);
+ return stage3pairarray;
+ }
- } else {
- plus_segments_genestrand_5[+2] = this_geneminus_5->plus_segments;
- plus_nsegments_genestrand_5[+2] = this_geneminus_5->plus_nsegments;
- minus_segments_genestrand_5[+2] = this_geneminus_5->minus_segments;
- minus_nsegments_genestrand_5[+2] = this_geneminus_5->minus_nsegments;
-
- plus_segments_genestrand_3[+2] = this_geneminus_3->plus_segments;
- plus_nsegments_genestrand_3[+2] = this_geneminus_3->plus_nsegments;
- minus_segments_genestrand_3[+2] = this_geneminus_3->minus_segments;
- minus_nsegments_genestrand_3[+2] = this_geneminus_3->minus_nsegments;
-
- stage3pairarray =
- consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
- &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs_geneminus,samechr_geneminus,conc_transloc_geneminus,
- hits_geneminus_5,hits_geneminus_3,gmap_history_5,gmap_history_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
- plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,
- localsplicing_penalty,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneminus_5,querylength5);
- Stage1_free(&this_geneminus_3,querylength3);
- return stage3pairarray;
- }
+ } else {
+ hitpairs = List_append(hitpairs_geneplus,hitpairs_geneminus);
+ samechr = List_append(samechr_geneplus,samechr_geneminus);
+ conc_transloc = List_append(conc_transloc_geneplus,conc_transloc_geneminus);
+ hits5 = List_append(hits_geneplus_5,hits_geneminus_5);
+ hits3 = List_append(hits_geneplus_3,hits_geneminus_3);
- } else {
- hitpairs = List_append(hitpairs_geneplus,hitpairs_geneminus);
- samechr = List_append(samechr_geneplus,samechr_geneminus);
- conc_transloc = List_append(conc_transloc_geneplus,conc_transloc_geneminus);
- hits5 = List_append(hits_geneplus_5,hits_geneminus_5);
- hits3 = List_append(hits_geneplus_3,hits_geneminus_3);
-
- plus_segments_genestrand_5[+1] = this_geneplus_5->plus_segments;
- plus_nsegments_genestrand_5[+1] = this_geneplus_5->plus_nsegments;
- minus_segments_genestrand_5[+1] = this_geneplus_5->minus_segments;
- minus_nsegments_genestrand_5[+1] = this_geneplus_5->minus_nsegments;
-
- plus_segments_genestrand_3[+1] = this_geneplus_3->plus_segments;
- plus_nsegments_genestrand_3[+1] = this_geneplus_3->plus_nsegments;
- minus_segments_genestrand_3[+1] = this_geneplus_3->minus_segments;
- minus_nsegments_genestrand_3[+1] = this_geneplus_3->minus_nsegments;
-
- plus_segments_genestrand_5[+2] = this_geneminus_5->plus_segments;
- plus_nsegments_genestrand_5[+2] = this_geneminus_5->plus_nsegments;
- minus_segments_genestrand_5[+2] = this_geneminus_5->minus_segments;
- minus_nsegments_genestrand_5[+2] = this_geneminus_5->minus_nsegments;
-
- plus_segments_genestrand_3[+2] = this_geneminus_3->plus_segments;
- plus_nsegments_genestrand_3[+2] = this_geneminus_3->plus_nsegments;
- minus_segments_genestrand_3[+2] = this_geneminus_3->minus_segments;
- minus_nsegments_genestrand_3[+2] = this_geneminus_3->minus_nsegments;
-
- stage3pairarray =
- consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
- &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
- &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
- hitpairs,samechr,conc_transloc,hits5,hits3,gmap_history_5,gmap_history_3,
- query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
- plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
- plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
- queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
- queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
- cutoff_level_5,cutoff_level_3,
- localsplicing_penalty,
- oligoindices_major,oligoindices_minor,
- pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
- History_free(&gmap_history_3);
- History_free(&gmap_history_5);
- Compress_free(&query5_compress_fwd);
- Compress_free(&query5_compress_rev);
- Compress_free(&query3_compress_fwd);
- Compress_free(&query3_compress_rev);
- Stage1_free(&this_geneminus_5,querylength5);
- Stage1_free(&this_geneminus_3,querylength3);
- Stage1_free(&this_geneplus_5,querylength5);
- Stage1_free(&this_geneplus_3,querylength3);
- return stage3pairarray;
- }
+ plus_segments_genestrand_5[+1] = this_geneplus_5->plus_segments;
+ plus_nsegments_genestrand_5[+1] = this_geneplus_5->plus_nsegments;
+ minus_segments_genestrand_5[+1] = this_geneplus_5->minus_segments;
+ minus_nsegments_genestrand_5[+1] = this_geneplus_5->minus_nsegments;
+
+ plus_segments_genestrand_3[+1] = this_geneplus_3->plus_segments;
+ plus_nsegments_genestrand_3[+1] = this_geneplus_3->plus_nsegments;
+ minus_segments_genestrand_3[+1] = this_geneplus_3->minus_segments;
+ minus_nsegments_genestrand_3[+1] = this_geneplus_3->minus_nsegments;
+
+ plus_segments_genestrand_5[+2] = this_geneminus_5->plus_segments;
+ plus_nsegments_genestrand_5[+2] = this_geneminus_5->plus_nsegments;
+ minus_segments_genestrand_5[+2] = this_geneminus_5->minus_segments;
+ minus_nsegments_genestrand_5[+2] = this_geneminus_5->minus_nsegments;
+
+ plus_segments_genestrand_3[+2] = this_geneminus_3->plus_segments;
+ plus_nsegments_genestrand_3[+2] = this_geneminus_3->plus_nsegments;
+ minus_segments_genestrand_3[+2] = this_geneminus_3->minus_segments;
+ minus_nsegments_genestrand_3[+2] = this_geneminus_3->minus_nsegments;
+
+ stage3pairarray =
+ consolidate_paired_results(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
+ &(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
+ &(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
+ hitpairs,samechr,conc_transloc,hits5,hits3,gmap_history_5,gmap_history_3,
+ query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
+ plus_segments_genestrand_5,plus_nsegments_genestrand_5,minus_segments_genestrand_5,minus_nsegments_genestrand_5,
+ plus_segments_genestrand_3,plus_nsegments_genestrand_3,minus_segments_genestrand_3,minus_nsegments_genestrand_3,
+ queryseq5,queryuc_ptr_5,queryrc5,quality_string_5,querylength5,query5_lastpos,
+ queryseq3,queryuc_ptr_3,queryrc3,quality_string_3,querylength3,query3_lastpos,
+ cutoff_level_5,cutoff_level_3,localsplicing_penalty,oligoindices_major,oligoindices_minor,
+ pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,user_maxlevel_5,user_maxlevel_3);
+ History_free(&gmap_history_3);
+ History_free(&gmap_history_5);
+ Compress_free(&query5_compress_fwd);
+ Compress_free(&query5_compress_rev);
+ Compress_free(&query3_compress_fwd);
+ Compress_free(&query3_compress_rev);
+ Stage1_free(&this_geneminus_5,querylength5);
+ Stage1_free(&this_geneminus_3,querylength3);
+ Stage1_free(&this_geneplus_5,querylength5);
+ Stage1_free(&this_geneplus_3,querylength3);
+ return stage3pairarray;
}
}
@@ -21161,7 +20925,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
Chrpos_T pairmax, bool keep_floors_p) {
- if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED) {
+ if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
return paired_read(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
@@ -21172,7 +20936,7 @@ Stage1_paired_read (int *npaths, int *first_absmq, int *second_absmq, Pairtype_T
oligoindices_major,oligoindices_minor,
pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,pairmax,keep_floors_p);
- } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED) {
+ } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED || mode == TTOC_NONSTRANDED) {
return paired_read_tolerant_nonstranded(&(*npaths),&(*first_absmq),&(*second_absmq),&(*final_pairtype),
&(*stage3array5),&(*nhits5),&(*first_absmq5),&(*second_absmq5),
&(*stage3array3),&(*nhits3),&(*first_absmq3),&(*second_absmq3),
@@ -21230,7 +20994,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
two_index1intervals = index1interval_in + index1interval_in;
spansize = spansize_in;
- min_readlength = index1part_in + index1interval_in - 1;
+ min_kmer_readlength = index1part_in + index1interval_in - 1;
chromosome_iit = chromosome_iit_in;
circular_typeint = Univ_IIT_typeint(chromosome_iit,"circular");
nchromosomes = nchromosomes_in;
diff --git a/src/stage3hr.c b/src/stage3hr.c
index b3ee14d..9dd4189 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 167162 2015-06-09 20:53:13Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 167393 2015-06-11 22:16:20Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -4802,9 +4802,15 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
}
nmismatches_whole += nmismatches;
debug0(printf("nmismatches %d from sarray\n",nmismatches));
+#ifdef LARGE_GENOMES
+ if (Uint8list_next(q) == NULL && right_ambig == NULL) {
+ trim_right_p = true;
+ }
+#else
if (Uintlist_next(q) == NULL && right_ambig == NULL) {
trim_right_p = true;
}
+#endif
if ((substring = Substring_new(/*nmismatches_whole*/nmismatches,chrnum,chroffset,chrhigh,chrlength,
query_compress,/*start_endtype*/END,/*end_endtype*/END,
querystart,queryend,querylength,alignstart,alignend,
@@ -4932,9 +4938,15 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
}
nmismatches_whole += nmismatches;
debug0(printf("nmismatches %d from sarray\n",nmismatches));
+#ifdef LARGE_GENOMES
+ if (Uint8list_next(q) == NULL && right_ambig == NULL) {
+ trim_left_p = true;
+ }
+#else
if (Uintlist_next(q) == NULL && right_ambig == NULL) {
trim_left_p = true;
}
+#endif
if ((substring = Substring_new(/*nmismatches_whole*/nmismatches,chrnum,chroffset,chrhigh,chrlength,
query_compress,/*start_endtype*/END,/*end_endtype*/END,
/*querystart*/querylength - queryend,/*queryend*/querylength - querystart,querylength,
diff --git a/src/substring.c b/src/substring.c
index 4d72940..a6043df 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 166827 2015-06-03 06:55:46Z twu $";
+static char rcsid[] = "$Id: substring.c 167592 2015-06-15 18:56:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1466,6 +1466,95 @@ mark_mismatches_atoi_sam (char *gbuffer, char *query, int start, int end, int ge
}
+static void
+mark_mismatches_ttoc_gsnap (char *gbuffer, char *query, int start, int end, int genestrand) {
+ int i;
+
+ debug1(printf("query: %s\n",query));
+ debug1(printf("genome: %s\n",gbuffer));
+ debug1(printf("count: "));
+
+ if (genestrand == +2) {
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'A' && query[i] == 'G') {
+ debug1(printf("."));
+ gbuffer[i] = '.';
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
+ }
+ }
+
+ } else {
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'T' && query[i] == 'C') {
+ debug1(printf("."));
+ gbuffer[i] = '.';
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
+ }
+ }
+ }
+
+ return;
+}
+
+
+
+static void
+mark_mismatches_ttoc_sam (char *gbuffer, char *query, int start, int end, int genestrand) {
+ int i;
+
+ debug1(printf("query: %s\n",query));
+ debug1(printf("genome: %s\n",gbuffer));
+ debug1(printf("count: "));
+
+ if (genestrand == +2) {
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'A' && query[i] == 'G') {
+ debug1(printf("."));
+#if 0
+ /* Want to show mismatches */
+ gbuffer[i] = 'G'; /* Avoids showing mismatches in MD and NM strings */
+#endif
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
+ }
+ }
+
+ } else {
+ for (i = start; i < end; i++) {
+ if (gbuffer[i] == 'T' && query[i] == 'C') {
+ debug1(printf("."));
+#if 0
+ /* Want to show mismatches */
+ gbuffer[i] = 'C'; /* Avoids showing mismatches in MD and NM strings */
+#endif
+ } else if (query[i] != gbuffer[i]) {
+ debug1(printf("x"));
+ assert(gbuffer[i] != OUTOFBOUNDS);
+ gbuffer[i] = (char) tolower(gbuffer[i]);
+ } else {
+ debug1(printf("*"));
+ }
+ }
+ }
+
+ return;
+}
+
+
void
Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
@@ -1539,6 +1628,8 @@ embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend
mark_mismatches_cmet_gsnap(result,query,querystart,queryend,genestrand);
} else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
mark_mismatches_atoi_gsnap(result,query,querystart,queryend,genestrand);
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ mark_mismatches_ttoc_gsnap(result,query,querystart,queryend,genestrand);
} else {
abort();
}
@@ -1583,6 +1674,8 @@ embellish_genomic_sam (char *genomic_diff, char *query, int querystart, int quer
mark_mismatches_cmet_sam(result,query,querystart,queryend,genestrand);
} else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
mark_mismatches_atoi_sam(result,query,querystart,queryend,genestrand);
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ mark_mismatches_ttoc_sam(result,query,querystart,queryend,genestrand);
} else {
abort();
}
diff --git a/src/types.h b/src/types.h
index cb02ba0..c7df5f2 100644
--- a/src/types.h
+++ b/src/types.h
@@ -1,4 +1,4 @@
-/* $Id: types.h 157223 2015-01-22 18:43:01Z twu $ */
+/* $Id: types.h 168395 2015-06-26 17:13:13Z twu $ */
#ifndef TYPES_INCLUDED
#define TYPES_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -116,6 +116,13 @@ typedef Uintlist_T Univcoordlist_T;
#endif
+/* For univintervals and Univ_IIT (chromosome_iit) files. Use the largest word size allowable on the machine. */
+#ifdef HAVE_64_BIT
+typedef UINT8 Univ_IIT_coord_T;
+#else
+typedef UINT4 Univ_IIT_coord_T;
+#endif
+
/* For splicetrie */
typedef UINT4 Trieoffset_T;
typedef UINT4 Triecontent_T;
diff --git a/src/uniqscan.c b/src/uniqscan.c
index 79b0470..c2359ad 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 167592 2015-06-15 18:56:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -775,8 +775,12 @@ main (int argc, char *argv[]) {
mode = ATOI_STRANDED;
} else if (!strcmp(optarg,"atoi-nonstranded")) {
mode = ATOI_NONSTRANDED;
+ } else if (!strcmp(optarg,"ttoc-stranded")) {
+ mode = TTOC_STRANDED;
+ } else if (!strcmp(optarg,"ttoc-nonstranded")) {
+ mode = TTOC_NONSTRANDED;
} else {
- fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, or atoi\n");
+ fprintf(stderr,"--mode must be standard, cmet-stranded, cmet-nonstranded, atoi-stranded, atoi-nonstranded, ttoc-stranded, or ttoc-nonstranded\n");
exit(9);
}
@@ -1063,6 +1067,30 @@ main (int argc, char *argv[]) {
exit(9);
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (user_atoidir == NULL) {
+ modedir = genomesubdir;
+ } else {
+ modedir = user_atoidir;
+ }
+
+ if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
+ required_index1part,required_interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
+ fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
+ if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
+ required_index1part,required_interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
+ fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
+ exit(9);
+ }
} else {
/* Standard behavior */
@@ -1140,6 +1168,30 @@ main (int argc, char *argv[]) {
exit(9);
}
+ } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
+ if (user_atoidir == NULL) {
+ modedir = snpsdir;
+ } else {
+ modedir = user_atoidir;
+ }
+
+ if ((indexdb = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
+ required_index1part,required_interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
+ fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+ if ((indexdb2 = Indexdb_new_genome(&index1part,&index1interval,
+ modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
+ required_index1part,required_interval,
+ expand_offsets_p,offsetsstrm_access,positions_access,
+ /*sharedp*/false)) == NULL) {
+ fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
+ exit(9);
+ }
+
} else {
indexdb = Indexdb_new_genome(&index1part,&index1interval,
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
diff --git a/src/univinterval.h b/src/univinterval.h
index a0c6877..406e989 100644
--- a/src/univinterval.h
+++ b/src/univinterval.h
@@ -1,4 +1,4 @@
-/* $Id: univinterval.h 157221 2015-01-22 18:38:57Z twu $ */
+/* $Id: univinterval.h 168395 2015-06-26 17:13:13Z twu $ */
#ifndef UNIVINTERVAL_INCLUDED
#define UNIVINTERVAL_INCLUDED
@@ -9,8 +9,8 @@
#define T Univinterval_T
typedef struct T *T;
struct T {
- Univcoord_T low; /* low <= high */
- Univcoord_T high;
+ Univ_IIT_coord_T low; /* low <= high */
+ Univ_IIT_coord_T high;
int sign;
int type;
};
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 23a693e..5cb4615 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -49,6 +49,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/shm-flags.m4 \
$(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
diff --git a/util/Makefile.in b/util/Makefile.in
index 2a662a2..0f440f3 100644
--- a/util/Makefile.in
+++ b/util/Makefile.in
@@ -58,6 +58,7 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/mmap-flags.m4 \
$(top_srcdir)/config/acx_mmap_fixed.m4 \
$(top_srcdir)/config/acx_mmap_variable.m4 \
+ $(top_srcdir)/config/shm-flags.m4 \
$(top_srcdir)/config/ax_mpi.m4 \
$(top_srcdir)/config/acx_pthread.m4 \
$(top_srcdir)/config/builtin-popcount.m4 \
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list