[med-svn] [Git][med-team/htscodecs][upstream] New upstream version 1.5.1
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Sat Jul 22 19:14:22 BST 2023
Étienne Mollier pushed to branch upstream at Debian Med / htscodecs
Commits:
b828bcf2 by Étienne Mollier at 2023-07-22T20:08:50+02:00
New upstream version 1.5.1
- - - - -
19 changed files:
- .cirrus.yml
- NEWS.md
- configure.ac
- htscodecs/htscodecs.h
- htscodecs/rANS_static32x16pr.h
- htscodecs/rANS_static32x16pr_neon.c
- htscodecs/rANS_static4x16pr.c
- tests/arith_dynamic_fuzz.c
- tests/arith_dynamic_test.c
- tests/entropy.c
- tests/fqzcomp_qual_fuzz.c
- tests/fqzcomp_qual_test.c
- tests/rANS_static4x16pr_fuzz.c
- tests/rANS_static4x16pr_test.c
- tests/rANS_static_fuzz.c
- tests/rANS_static_test.c
- tests/tokenise_name3_fuzz.c
- tests/tokenise_name3_test.c
- tests/varint_test.c
Changes:
=====================================
.cirrus.yml
=====================================
@@ -18,7 +18,6 @@ compile_template: &COMPILE
# on_failure:
# - cat tests/test-suite.log
-
# ----------
# Linux
@@ -151,3 +150,22 @@ task:
# - c:\tools\msys64\usr\bin\bash --login "autoreconf -i"
# - c:\tools\msys64\usr\bin\bash "./configure"
# - c:\tools\msys64\usr\bin\bash "make -j4"
+
+# ----------
+# FreeBSD
+task:
+ name: freebsd
+ freebsd_instance:
+ image_family: freebsd-13-1
+
+ pkginstall_script:
+ - pkg update -f
+ - pkg install -y gcc autoconf automake libdeflate libtool
+
+ compile_script:
+ - autoreconf -i
+ - ./configure
+ - make -j4 CFLAGS="-g -O3 -Wall -Werror"
+
+ test_script:
+ - make check CFLAGS="-g -O3 -Wall -Werror"
=====================================
NEWS.md
=====================================
@@ -1,3 +1,29 @@
+Release 1.5.1: 19th July 2023
+-----------------------------
+
+This release is mainly small updates and bug fixes focusing on
+specific platforms, with no new features added.
+
+Changes
+
+- Be more selective in use of AVX512 on AMD Zen4 processors. This can
+ be faster (e.g. with 64-way unrolling), but in the current rANS codec
+ implementations AVX2 is faster for certain operations (PR#85).
+
+- Add config.h to test programs to help them pick up definitions such
+ as XOPEN_SOURCE (PR#84)
+
+- Add FreeBSD to CI testing (PR#83)
+
+Bug fixes
+
+- Trivial bug fix to the rans4x16pr test harness when given
+ incompressible data (PR#86).
+
+- Make ARM NEON checks specific to AArch64 and exclude AArch32 systems.
+ (PR#82 to fix issue#81, reported by Robert Clausecker)
+
+
Release 1.5.0: 14th April 2023
------------------------------
=====================================
configure.ac
=====================================
@@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT(htscodecs, 1.5.0)
+AC_INIT(htscodecs, 1.5.1)
# Some functions benefit from -O3 optimisation, so if the user didn't
# explicitly set any compiler flags, we'll plump for O3.
@@ -61,7 +61,7 @@ AM_EXTRA_RECURSIVE_TARGETS([fuzz])
# libhtscodecs.so.1.1.0
VERS_CURRENT=3
-VERS_REVISION=2
+VERS_REVISION=3
VERS_AGE=1
AC_SUBST(VERS_CURRENT)
AC_SUBST(VERS_REVISION)
=====================================
htscodecs/htscodecs.h
=====================================
@@ -43,7 +43,7 @@
* Note currently this needs manually editing as it isn't automatically
* updated by autoconf.
*/
-#define HTSCODECS_VERSION 100500
+#define HTSCODECS_VERSION 100501
/*
* A const string form of the HTSCODECS_VERSION define.
=====================================
htscodecs/rANS_static32x16pr.h
=====================================
@@ -146,7 +146,7 @@ unsigned char *rans_uncompress_O1_32x16_avx512(unsigned char *in,
//----------------------------------------------------------------------
// Arm Neon implementation
-#ifdef __ARM_NEON
+#if defined(__ARM_NEON) && defined(__aarch64__)
unsigned char *rans_compress_O0_32x16_neon(unsigned char *in,
unsigned int in_size,
unsigned char *out,
=====================================
htscodecs/rANS_static32x16pr_neon.c
=====================================
@@ -32,7 +32,7 @@
*/
#include "config.h"
-#ifdef __ARM_NEON
+#if defined(__ARM_NEON) && defined(__aarch64__)
#include <arm_neon.h>
#include <limits.h>
=====================================
htscodecs/rANS_static4x16pr.c
=====================================
@@ -57,6 +57,10 @@
#include <limits.h>
#include <math.h>
+#ifndef NO_THREADS
+#include <pthread.h>
+#endif
+
#include "rANS_word.h"
#include "rANS_static4x16.h"
#include "rANS_static16_int.h"
@@ -842,26 +846,28 @@ void rans_set_cpu(int opts) {
# define UNUSED
#endif
-static inline
-unsigned char *(*rans_enc_func(int do_simd, int order))
- (unsigned char *in,
- unsigned int in_size,
- unsigned char *out,
- unsigned int *out_size) {
- if (!do_simd) { // SIMD disabled
- return order & 1
- ? rans_compress_O1_4x16
- : rans_compress_O0_4x16;
- }
+// CPU detection is performed once. NB this has an assumption that we're
+// not migrating between processes with different instruction stes, but
+// to date the only systems I know of that support this don't have different
+// capabilities (that we use) per core.
+#ifndef NO_THREADS
+static pthread_once_t rans_cpu_once = PTHREAD_ONCE_INIT;
+#endif
+
+static int have_ssse3 UNUSED = 0;
+static int have_sse4_1 UNUSED = 0;
+static int have_popcnt UNUSED = 0;
+static int have_avx2 UNUSED = 0;
+static int have_avx512f UNUSED = 0;
+static int is_amd UNUSED = 0;
+
+static void htscodecs_tls_cpu_init(void) {
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
// These may be unused, depending on HAVE_* config.h macros
- int have_ssse3 UNUSED = 0;
- int have_sse4_1 UNUSED = 0;
- int have_popcnt UNUSED = 0;
- int have_avx2 UNUSED = 0;
- int have_avx512f UNUSED = 0;
int level = __get_cpuid_max(0, NULL);
+ __cpuid_count(0, 0, eax, ebx, ecx, edx);
+ is_amd = (ecx == 0x444d4163);
if (level >= 1) {
__cpuid_count(1, 0, eax, ebx, ecx, edx);
#if defined(bit_SSSE3)
@@ -890,10 +896,34 @@ unsigned char *(*rans_enc_func(int do_simd, int order))
if (!(rans_cpu & RANS_CPU_ENC_AVX512)) have_avx512f = 0;
if (!(rans_cpu & RANS_CPU_ENC_AVX2)) have_avx2 = 0;
if (!(rans_cpu & RANS_CPU_ENC_SSE4)) have_sse4_1 = 0;
+}
+
+static inline
+unsigned char *(*rans_enc_func(int do_simd, int order))
+ (unsigned char *in,
+ unsigned int in_size,
+ unsigned char *out,
+ unsigned int *out_size) {
+ if (!do_simd) { // SIMD disabled
+ return order & 1
+ ? rans_compress_O1_4x16
+ : rans_compress_O0_4x16;
+ }
+
+#ifdef NO_THREADS
+ htscodecs_tls_cpu_init();
+#else
+ int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init);
+ if (err != 0) {
+ fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n",
+ strerror(err));
+ fprintf(stderr, "Using scalar code only\n");
+ }
+#endif
if (order & 1) {
#if defined(HAVE_AVX512)
- if (have_avx512f)
+ if (have_avx512f && (!is_amd || !have_avx2))
return rans_compress_O1_32x16_avx512;
#endif
#if defined(HAVE_AVX2)
@@ -907,7 +937,7 @@ unsigned char *(*rans_enc_func(int do_simd, int order))
return rans_compress_O1_32x16;
} else {
#if defined(HAVE_AVX512)
- if (have_avx512f)
+ if (have_avx512f && (!is_amd || !have_avx2))
return rans_compress_O0_32x16_avx512;
#endif
#if defined(HAVE_AVX2)
@@ -934,46 +964,17 @@ unsigned char *(*rans_dec_func(int do_simd, int order))
? rans_uncompress_O1_4x16
: rans_uncompress_O0_4x16;
}
- unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- // These may be unused, depending on HAVE_* config.h macros
- int have_ssse3 UNUSED = 0;
- int have_sse4_1 UNUSED = 0;
- int have_popcnt UNUSED = 0;
- int have_avx2 UNUSED = 0;
- int have_avx512f UNUSED = 0;
- int level = __get_cpuid_max(0, NULL);
- if (level >= 1) {
- __cpuid_count(1, 0, eax, ebx, ecx, edx);
-#if defined(bit_SSSE3)
- have_ssse3 = ecx & bit_SSSE3;
-#endif
-#if defined(bit_POPCNT)
- have_popcnt = ecx & bit_POPCNT;
-#endif
-#if defined(bit_SSE4_1)
- have_sse4_1 = ecx & bit_SSE4_1;
-#endif
+#ifdef NO_THREADS
+ htscodecs_tls_cpu_init();
+#else
+ int err = pthread_once(&rans_cpu_once, htscodecs_tls_cpu_init);
+ if (err != 0) {
+ fprintf(stderr, "Initialising TLS data failed: pthread_once: %s\n",
+ strerror(err));
+ fprintf(stderr, "Using scalar code only\n");
}
- if (level >= 7) {
- __cpuid_count(7, 0, eax, ebx, ecx, edx);
-#if defined(bit_AVX2)
- have_avx2 = ebx & bit_AVX2;
-#endif
-#if defined(bit_AVX512F)
- have_avx512f = ebx & bit_AVX512F;
#endif
- }
-
- if (!have_popcnt) have_avx512f = have_avx2 = have_sse4_1 = 0;
- if (!have_ssse3) have_sse4_1 = 0;
-
- if (!(rans_cpu & RANS_CPU_DEC_AVX512)) have_avx512f = 0;
- if (!(rans_cpu & RANS_CPU_DEC_AVX2)) have_avx2 = 0;
- if (!(rans_cpu & RANS_CPU_DEC_SSE4)) have_sse4_1 = 0;
-
- // fprintf(stderr, "SSSE3 %d, SSE4.1 %d, POPCNT %d, AVX2 %d, AVX512F %d\n",
- // have_ssse3, have_sse4_1, have_popcnt, have_avx2, have_avx512f);
if (order & 1) {
#if defined(HAVE_AVX512)
@@ -991,7 +992,7 @@ unsigned char *(*rans_dec_func(int do_simd, int order))
return rans_uncompress_O1_32x16;
} else {
#if defined(HAVE_AVX512)
- if (have_avx512f)
+ if (have_avx512f && (!is_amd || !have_avx2))
return rans_uncompress_O0_32x16_avx512;
#endif
#if defined(HAVE_AVX2)
@@ -1006,7 +1007,7 @@ unsigned char *(*rans_dec_func(int do_simd, int order))
}
}
-#elif defined(__ARM_NEON)
+#elif defined(__ARM_NEON) && defined(__aarch64__)
#if defined(__linux__) || defined(__FreeBSD__)
#include <sys/auxv.h>
=====================================
tests/arith_dynamic_fuzz.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdint.h>
#include <stdlib.h>
=====================================
tests/arith_dynamic_test.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdint.h>
#include <stdlib.h>
=====================================
tests/entropy.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
/*
* This test aims to test all entropy codecs on an input file.
=====================================
tests/fqzcomp_qual_fuzz.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdio.h>
#include <stdint.h>
=====================================
tests/fqzcomp_qual_test.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdio.h>
#include <stdint.h>
=====================================
tests/rANS_static4x16pr_fuzz.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
/*
For best results, configure, from a build subdir, to use the address and
=====================================
tests/rANS_static4x16pr_test.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdint.h>
#include <stdlib.h>
@@ -49,9 +50,6 @@
# define BLK_SIZE 0x103810
#endif
-// Room to allow for expanded BLK_SIZE on worst case compression.
-#define BLK_SIZE2 ((105LL*BLK_SIZE)/100)
-
unsigned char *in_buf;
// Max 4GB
@@ -86,8 +84,7 @@ int main(int argc, char **argv) {
FILE *infp = stdin, *outfp = stdout;
struct timeval tv1, tv2, tv3, tv4;
size_t bytes = 0, raw = 0;
-
- in_buf = malloc(BLK_SIZE2+257*257*3);
+ uint32_t blk_size = BLK_SIZE;
#ifdef _WIN32
_setmode(_fileno(stdin), _O_BINARY);
@@ -100,7 +97,7 @@ int main(int argc, char **argv) {
extern void rans_disable_avx512(void);
extern void rans_disable_avx2(void);
- while ((opt = getopt(argc, argv, "o:dtrc:")) != -1) {
+ while ((opt = getopt(argc, argv, "o:dtrc:b:")) != -1) {
switch (opt) {
case 'o': {
char *optend;
@@ -126,9 +123,17 @@ int main(int argc, char **argv) {
case 'r':
raw = 1;
break;
+
+ case 'b':
+ blk_size = atoi(optarg);
+ break;
}
}
+ // Room to allow for expanded BLK_SIZE on worst case compression.
+ uint32_t blk_size2 = (105LL*blk_size)/100;
+ in_buf = malloc(blk_size2+257*257*3);
+
if (optind < argc) {
if (!(infp = fopen(argv[optind], "rb"))) {
perror(argv[optind]);
@@ -157,7 +162,6 @@ int main(int argc, char **argv) {
blocks *b = NULL, *bc = NULL, *bu = NULL;
int nb = 0, i;
- uint32_t blk_size = BLK_SIZE;
if (raw) {
b = malloc(sizeof(*b));
bu = malloc(sizeof(*bu));
@@ -287,7 +291,7 @@ int main(int argc, char **argv) {
if (4 != fread(&in_size, 1, 4, infp))
break;
- if (in_size > BLK_SIZE)
+ if (in_size > blk_size2)
exit(1);
if (in_size != fread(in_buf, 1, in_size, infp)) {
@@ -309,7 +313,7 @@ int main(int argc, char **argv) {
uint32_t in_size, out_size;
unsigned char *out;
- in_size = fread(in_buf, 1, BLK_SIZE, infp);
+ in_size = fread(in_buf, 1, blk_size, infp);
if (in_size <= 0)
break;
=====================================
tests/rANS_static_fuzz.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
/*
For best results, configure, from a build subdir, to use the address and
=====================================
tests/rANS_static_test.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdint.h>
#include <stdlib.h>
=====================================
tests/tokenise_name3_fuzz.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdint.h>
#include <stdlib.h>
=====================================
tests/tokenise_name3_test.c
=====================================
@@ -30,6 +30,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdio.h>
#include <stdlib.h>
=====================================
tests/varint_test.c
=====================================
@@ -31,6 +31,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
#include <stdlib.h>
#include <stdint.h>
View it on GitLab: https://salsa.debian.org/med-team/htscodecs/-/commit/b828bcf2fe6b9f928f2e1d99f73adc45d44da7ff
--
View it on GitLab: https://salsa.debian.org/med-team/htscodecs/-/commit/b828bcf2fe6b9f928f2e1d99f73adc45d44da7ff
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230722/eaca2121/attachment-0001.htm>
More information about the debian-med-commit
mailing list