[DHG_packages] 01/01: cryptonite: replacement alignment patch

Clint Adams clint at moszumanska.debian.org
Sun Nov 6 20:18:35 UTC 2016


This is an automated email from the git hooks/post-receive script.

clint pushed a commit to branch master
in repository DHG_packages.

commit 4a9cbde6ea39b767bfcd7c3de7720a9743dc0e5e
Author: Clint Adams <clint at debian.org>
Date:   Sun Nov 6 15:17:35 2016 -0500

    cryptonite: replacement alignment patch
---
 p/haskell-cryptonite/debian/changelog              |   6 +
 .../debian/patches/crypto-buffer-alignment.patch   | 676 +++++++++++++++++++--
 2 files changed, 644 insertions(+), 38 deletions(-)

diff --git a/p/haskell-cryptonite/debian/changelog b/p/haskell-cryptonite/debian/changelog
index cb2200b..74c17db 100644
--- a/p/haskell-cryptonite/debian/changelog
+++ b/p/haskell-cryptonite/debian/changelog
@@ -1,3 +1,9 @@
+haskell-cryptonite (0.20-5) unstable; urgency=medium
+
+  * Patch from James Clarke to fix alignment on sparc64.
+
+ -- Clint Adams <clint at debian.org>  Sun, 06 Nov 2016 15:16:57 -0500
+
 haskell-cryptonite (0.20-4) unstable; urgency=medium
 
   * Upload to unstable as part of GHC 8 transition.
diff --git a/p/haskell-cryptonite/debian/patches/crypto-buffer-alignment.patch b/p/haskell-cryptonite/debian/patches/crypto-buffer-alignment.patch
index a5398ed..0f1b840 100644
--- a/p/haskell-cryptonite/debian/patches/crypto-buffer-alignment.patch
+++ b/p/haskell-cryptonite/debian/patches/crypto-buffer-alignment.patch
@@ -1,51 +1,651 @@
-Author: Steve Langasek <steve.langasek at ubuntu.com>
-Description: fix alignment of memory blocks used by SHA3
- SHA3 works in 64-bit chunks, but the incoming data pointer can be at any
- address.  Copy our data to an aligned address, to avoid SIGBUS on certain
- platforms.
- .
- This is not the only alignment issue in the code, but it is the one that
- manifests as SIGBUS on the most architectures.
+From c5a1df04ea6c560aea72e60dc1eb022cb3763d5f Mon Sep 17 00:00:00 2001
+From: James Clarke <jrtc27 at jrtc27.com>
+Date: Sun, 6 Nov 2016 19:49:59 +0000
+Subject: [PATCH] Fix unaligned memory accesses; fixes #108
 
-Index: haskell-cryptonite-0.20/cbits/cryptonite_sha3.c
-===================================================================
---- haskell-cryptonite-0.20.orig/cbits/cryptonite_sha3.c
-+++ haskell-cryptonite-0.20/cbits/cryptonite_sha3.c
-@@ -23,6 +23,7 @@
-  */
- 
- #include <stdint.h>
-+#include <stdlib.h>
- #include <string.h>
+---
+ cbits/cryptonite_bitfn.h    | 80 +++++++++++++++++++++++++++++++++++++++++++++
+ cbits/cryptonite_chacha.c   | 39 ++++++++++------------
+ cbits/cryptonite_md5.c      | 25 +++++++-------
+ cbits/cryptonite_poly1305.c | 33 ++++++++-----------
+ cbits/cryptonite_salsa.c    | 39 ++++++++++------------
+ cbits/cryptonite_scrypt.c   | 14 ++------
+ cbits/cryptonite_sha1.c     | 19 +++++------
+ cbits/cryptonite_sha256.c   | 14 ++++----
+ cbits/cryptonite_sha3.c     | 16 ++++-----
+ cbits/cryptonite_sha512.c   | 17 ++++++----
+ 10 files changed, 177 insertions(+), 119 deletions(-)
+
+diff --git a/cbits/cryptonite_bitfn.h b/cbits/cryptonite_bitfn.h
+index 3a00dd8..dfb5eb9 100644
+--- a/cbits/cryptonite_bitfn.h
++++ b/cbits/cryptonite_bitfn.h
+@@ -165,6 +165,86 @@ static inline void array_copy64(uint64_t *d, uint64_t *s, uint32_t nb)
+ }
+ #endif
+ 
++static inline uint32_t load_be32(const uint8_t *p)
++{
++	return ((uint32_t)p[0] << 24) |
++	       ((uint32_t)p[1] << 16) |
++	       ((uint32_t)p[2] <<  8) |
++	       ((uint32_t)p[3]      );
++}
++
++static inline uint64_t load_be64(const uint8_t *p)
++{
++	return ((uint64_t)p[0] << 56) |
++	       ((uint64_t)p[1] << 48) |
++	       ((uint64_t)p[2] << 40) |
++	       ((uint64_t)p[3] << 32) |
++	       ((uint64_t)p[4] << 24) |
++	       ((uint64_t)p[5] << 16) |
++	       ((uint64_t)p[6] <<  8) |
++	       ((uint64_t)p[7]      );
++}
++
++static inline void store_be32(uint8_t *p, uint32_t val)
++{
++	p[0] = (val >> 24);
++	p[1] = (val >> 16) & 0xFF;
++	p[2] = (val >>  8) & 0xFF;
++	p[3] = (val      ) & 0xFF;
++}
++
++static inline void store_be64(uint8_t *p, uint64_t val)
++{
++	p[0] = (val >> 56);
++	p[1] = (val >> 48) & 0xFF;
++	p[2] = (val >> 40) & 0xFF;
++	p[3] = (val >> 32) & 0xFF;
++	p[4] = (val >> 24) & 0xFF;
++	p[5] = (val >> 16) & 0xFF;
++	p[6] = (val >>  8) & 0xFF;
++	p[7] = (val      ) & 0xFF;
++}
++
++static inline uint32_t load_le32(const uint8_t *p)
++{
++	return ((uint32_t)p[0]      ) |
++	       ((uint32_t)p[1] <<  8) |
++	       ((uint32_t)p[2] << 16) |
++	       ((uint32_t)p[3] << 24);
++}
++
++static inline uint64_t load_le64(const uint8_t *p)
++{
++	return ((uint64_t)p[0]      ) |
++	       ((uint64_t)p[1] <<  8) |
++	       ((uint64_t)p[2] << 16) |
++	       ((uint64_t)p[3] << 24) |
++	       ((uint64_t)p[4] << 32) |
++	       ((uint64_t)p[5] << 40) |
++	       ((uint64_t)p[6] << 48) |
++	       ((uint64_t)p[7] << 56);
++}
++
++static inline void store_le32(uint8_t *p, uint32_t val)
++{
++	p[0] = (val      ) & 0xFF;
++	p[1] = (val >>  8) & 0xFF;
++	p[2] = (val >> 16) & 0xFF;
++	p[3] = (val >> 24);
++}
++
++static inline void store_le64(uint8_t *p, uint64_t val)
++{
++	p[0] = (val      ) & 0xFF;
++	p[1] = (val >>  8) & 0xFF;
++	p[2] = (val >> 16) & 0xFF;
++	p[3] = (val >> 24) & 0xFF;
++	p[4] = (val >> 32) & 0xFF;
++	p[5] = (val >> 40) & 0xFF;
++	p[6] = (val >> 48) & 0xFF;
++	p[7] = (val >> 56);
++}
++
+ #ifdef __GNUC__
+ #define bitfn_ntz(n) __builtin_ctz(n)
+ #else
+diff --git a/cbits/cryptonite_chacha.c b/cbits/cryptonite_chacha.c
+index 24e3ec0..4ee6f44 100644
+--- a/cbits/cryptonite_chacha.c
++++ b/cbits/cryptonite_chacha.c
+@@ -46,11 +46,6 @@
+ static const uint8_t sigma[16] = "expand 32-byte k";
+ static const uint8_t tau[16] = "expand 16-byte k";
+ 
+-static inline uint32_t load32(const uint8_t *p)
+-{
+-	return le32_to_cpu(*((uint32_t *) p));
+-}
+-
+ static void chacha_core(int rounds, block *out, const cryptonite_chacha_state *in)
+ {
+ 	uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+@@ -104,33 +99,33 @@ void cryptonite_chacha_init_core(cryptonite_chacha_state *st,
+ 	const uint8_t *constants = (keylen == 32) ? sigma : tau;
+ 	int i;
+ 
+-	st->d[0] = load32(constants + 0);
+-	st->d[1] = load32(constants + 4);
+-	st->d[2] = load32(constants + 8);
+-	st->d[3] = load32(constants + 12);
++	st->d[0] = load_le32(constants + 0);
++	st->d[1] = load_le32(constants + 4);
++	st->d[2] = load_le32(constants + 8);
++	st->d[3] = load_le32(constants + 12);
+ 
+-	st->d[4] = load32(key + 0);
+-	st->d[5] = load32(key + 4);
+-	st->d[6] = load32(key + 8);
+-	st->d[7] = load32(key + 12);
++	st->d[4] = load_le32(key + 0);
++	st->d[5] = load_le32(key + 4);
++	st->d[6] = load_le32(key + 8);
++	st->d[7] = load_le32(key + 12);
+ 	/* we repeat the key on 128 bits */
+ 	if (keylen == 32)
+ 		key += 16;
+-	st->d[8] = load32(key + 0);
+-	st->d[9] = load32(key + 4);
+-	st->d[10] = load32(key + 8);
+-	st->d[11] = load32(key + 12);
++	st->d[8] = load_le32(key + 0);
++	st->d[9] = load_le32(key + 4);
++	st->d[10] = load_le32(key + 8);
++	st->d[11] = load_le32(key + 12);
+ 	st->d[12] = 0;
+ 	switch (ivlen) {
+ 	case 8:
+ 		st->d[13] = 0;
+-		st->d[14] = load32(iv + 0);
+-		st->d[15] = load32(iv + 4);
++		st->d[14] = load_le32(iv + 0);
++		st->d[15] = load_le32(iv + 4);
+ 		break;
+ 	case 12:
+-		st->d[13] = load32(iv + 0);
+-		st->d[14] = load32(iv + 4);
+-		st->d[15] = load32(iv + 8);
++		st->d[13] = load_le32(iv + 0);
++		st->d[14] = load_le32(iv + 4);
++		st->d[15] = load_le32(iv + 8);
+ 	default:
+ 		return;
+ 	}
+diff --git a/cbits/cryptonite_md5.c b/cbits/cryptonite_md5.c
+index 4612a5e..3c4df08 100644
+--- a/cbits/cryptonite_md5.c
++++ b/cbits/cryptonite_md5.c
+@@ -44,15 +44,15 @@ void cryptonite_md5_init(struct md5_ctx *ctx)
+ #define f4(x, y, z)	(y ^ (x | ~z))
+ #define R(f, a, b, c, d, i, k, s) a += f(b, c, d) + w[i] + k; a = rol32(a, s); a += b
+ 
+-static void md5_do_chunk(struct md5_ctx *ctx, uint32_t *buf)
++static void md5_do_chunk(struct md5_ctx *ctx, const uint8_t *buf)
+ {
+ 	uint32_t a, b, c, d;
+-#ifdef ARCH_IS_BIG_ENDIAN
+ 	uint32_t w[16];
+-	cpu_to_le32_array(w, buf, 16);
+-#else
+-	uint32_t *w = buf;
+-#endif
++#define CPY(i)	w[i] = load_le32(buf+4*i)
++	CPY(0); CPY(1); CPY(2); CPY(3); CPY(4); CPY(5); CPY(6); CPY(7);
++	CPY(8); CPY(9); CPY(10); CPY(11); CPY(12); CPY(13); CPY(14); CPY(15);
++#undef CPY
++
+ 	a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ 
+ 	R(f1, a, b, c, d, 0, 0xd76aa478, 7);
+@@ -137,7 +137,7 @@ void cryptonite_md5_update(struct md5_ctx *ctx, const uint8_t *data, uint32_t le
+ 
+ 	if (index && len >= to_fill) {
+ 		memcpy(ctx->buf + index, data, to_fill);
+-		md5_do_chunk(ctx, (uint32_t *) ctx->buf);
++		md5_do_chunk(ctx, ctx->buf);
+ 		len -= to_fill;
+ 		data += to_fill;
+ 		index = 0;
+@@ -145,7 +145,7 @@ void cryptonite_md5_update(struct md5_ctx *ctx, const uint8_t *data, uint32_t le
+ 
+ 	/* process as much 64-block as possible */
+ 	for (; len >= 64; len -= 64, data += 64)
+-		md5_do_chunk(ctx, (uint32_t *) data);
++		md5_do_chunk(ctx, data);
+ 
+ 	/* append data into buf */
+ 	if (len)
+@@ -157,7 +157,6 @@ void cryptonite_md5_finalize(struct md5_ctx *ctx, uint8_t *out)
+ 	static uint8_t padding[64] = { 0x80, };
+ 	uint64_t bits;
+ 	uint32_t index, padlen;
+-	uint32_t *p = (uint32_t *) out;
+ 
+ 	/* add padding and update data with it */
+ 	bits = cpu_to_le64(ctx->sz << 3);
+@@ -171,8 +170,8 @@ void cryptonite_md5_finalize(struct md5_ctx *ctx, uint8_t *out)
+ 	cryptonite_md5_update(ctx, (uint8_t *) &bits, sizeof(bits));
+ 
+ 	/* output hash */
+-	p[0] = cpu_to_le32(ctx->h[0]);
+-	p[1] = cpu_to_le32(ctx->h[1]);
+-	p[2] = cpu_to_le32(ctx->h[2]);
+-	p[3] = cpu_to_le32(ctx->h[3]);
++	store_le32(out   , ctx->h[0]);
++	store_le32(out+ 4, ctx->h[1]);
++	store_le32(out+ 8, ctx->h[2]);
++	store_le32(out+12, ctx->h[3]);
+ }
+diff --git a/cbits/cryptonite_poly1305.c b/cbits/cryptonite_poly1305.c
+index 60dc5e0..9fa1fd1 100644
+--- a/cbits/cryptonite_poly1305.c
++++ b/cbits/cryptonite_poly1305.c
+@@ -38,11 +38,6 @@
+ #include "cryptonite_poly1305.h"
  #include "cryptonite_bitfn.h"
- #include "cryptonite_sha3.h"
-@@ -107,6 +108,7 @@ void cryptonite_sha3_init(struct sha3_ct
- void cryptonite_sha3_update(struct sha3_ctx *ctx, const uint8_t *data, uint32_t len)
+ 
+-static inline uint32_t load32(uint8_t *p)
+-{
+-	return (le32_to_cpu(*((uint32_t *) p)));
+-}
+-
+ static void poly1305_do_chunk(poly1305_ctx *ctx, uint8_t *data, int blocks, int final)
+ {
+ 	/* following is a cleanup copy of code available poly1305-donna */
+@@ -61,11 +56,11 @@ static void poly1305_do_chunk(poly1305_ctx *ctx, uint8_t *data, int blocks, int
+ 	s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5;
+ 
+ 	while (blocks--) {
+-		h0 += (load32(data+ 0)     ) & 0x3ffffff;
+-		h1 += (load32(data+ 3) >> 2) & 0x3ffffff;
+-		h2 += (load32(data+ 6) >> 4) & 0x3ffffff;
+-		h3 += (load32(data+ 9) >> 6) & 0x3ffffff;
+-		h4 += (load32(data+12) >> 8) | hibit;
++		h0 += (load_le32(data+ 0)     ) & 0x3ffffff;
++		h1 += (load_le32(data+ 3) >> 2) & 0x3ffffff;
++		h2 += (load_le32(data+ 6) >> 4) & 0x3ffffff;
++		h3 += (load_le32(data+ 9) >> 6) & 0x3ffffff;
++		h4 += (load_le32(data+12) >> 8) | hibit;
+ 
+ 		d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1);
+ 		d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2);
+@@ -94,16 +89,16 @@ void cryptonite_poly1305_init(poly1305_ctx *ctx, poly1305_key *key)
+ 
+ 	memset(ctx, 0, sizeof(poly1305_ctx));
+ 
+-	ctx->r[0] = (load32(&k[ 0])     ) & 0x3ffffff;
+-	ctx->r[1] = (load32(&k[ 3]) >> 2) & 0x3ffff03;
+-	ctx->r[2] = (load32(&k[ 6]) >> 4) & 0x3ffc0ff;
+-	ctx->r[3] = (load32(&k[ 9]) >> 6) & 0x3f03fff;
+-	ctx->r[4] = (load32(&k[12]) >> 8) & 0x00fffff;
++	ctx->r[0] = (load_le32(&k[ 0])     ) & 0x3ffffff;
++	ctx->r[1] = (load_le32(&k[ 3]) >> 2) & 0x3ffff03;
++	ctx->r[2] = (load_le32(&k[ 6]) >> 4) & 0x3ffc0ff;
++	ctx->r[3] = (load_le32(&k[ 9]) >> 6) & 0x3f03fff;
++	ctx->r[4] = (load_le32(&k[12]) >> 8) & 0x00fffff;
+ 
+-	ctx->pad[0] = load32(&k[16]);
+-	ctx->pad[1] = load32(&k[20]);
+-	ctx->pad[2] = load32(&k[24]);
+-	ctx->pad[3] = load32(&k[28]);
++	ctx->pad[0] = load_le32(&k[16]);
++	ctx->pad[1] = load_le32(&k[20]);
++	ctx->pad[2] = load_le32(&k[24]);
++	ctx->pad[3] = load_le32(&k[28]);
+ 
+ 	ctx->index = 0;
+ }
+diff --git a/cbits/cryptonite_salsa.c b/cbits/cryptonite_salsa.c
+index 0bd9660..38fc396 100644
+--- a/cbits/cryptonite_salsa.c
++++ b/cbits/cryptonite_salsa.c
+@@ -58,11 +58,6 @@ static const uint8_t tau[16] = "expand 16-byte k";
+ 		QR (x15,x12,x13,x14); \
+ 	}
+ 
+-static inline uint32_t load32(const uint8_t *p)
+-{
+-	return le32_to_cpu(*((uint32_t *) p));
+-}
+-
+ static void salsa_core(int rounds, block *out, const cryptonite_salsa_state *in)
+ {
+ 	uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+@@ -126,34 +121,34 @@ void cryptonite_salsa_init_core(cryptonite_salsa_state *st,
+ 	const uint8_t *constants = (keylen == 32) ? sigma : tau;
+ 	int i;
+ 
+-	st->d[0] = load32(constants + 0);
+-	st->d[5] = load32(constants + 4);
+-	st->d[10] = load32(constants + 8);
+-	st->d[15] = load32(constants + 12);
++	st->d[0] = load_le32(constants + 0);
++	st->d[5] = load_le32(constants + 4);
++	st->d[10] = load_le32(constants + 8);
++	st->d[15] = load_le32(constants + 12);
+ 
+-	st->d[1] = load32(key + 0);
+-	st->d[2] = load32(key + 4);
+-	st->d[3] = load32(key + 8);
+-	st->d[4] = load32(key + 12);
++	st->d[1] = load_le32(key + 0);
++	st->d[2] = load_le32(key + 4);
++	st->d[3] = load_le32(key + 8);
++	st->d[4] = load_le32(key + 12);
+ 	/* we repeat the key on 128 bits */
+ 	if (keylen == 32)
+ 		key += 16;
+-	st->d[11] = load32(key + 0);
+-	st->d[12] = load32(key + 4);
+-	st->d[13] = load32(key + 8);
+-	st->d[14] = load32(key + 12);
++	st->d[11] = load_le32(key + 0);
++	st->d[12] = load_le32(key + 4);
++	st->d[13] = load_le32(key + 8);
++	st->d[14] = load_le32(key + 12);
+ 
+ 	st->d[9] = 0;
+ 	switch (ivlen) {
+ 	case 8:
+-		st->d[6] = load32(iv + 0);
+-		st->d[7] = load32(iv + 4);
++		st->d[6] = load_le32(iv + 0);
++		st->d[7] = load_le32(iv + 4);
+ 		st->d[8] = 0;
+ 		break;
+ 	case 12:
+-		st->d[6] = load32(iv + 0);
+-		st->d[7] = load32(iv + 4);
+-		st->d[8] = load32(iv + 8);
++		st->d[6] = load_le32(iv + 0);
++		st->d[7] = load_le32(iv + 4);
++		st->d[8] = load_le32(iv + 8);
+ 	default:
+ 		return;
+ 	}
+diff --git a/cbits/cryptonite_scrypt.c b/cbits/cryptonite_scrypt.c
+index eaf28ac..bc6c137 100644
+--- a/cbits/cryptonite_scrypt.c
++++ b/cbits/cryptonite_scrypt.c
+@@ -49,16 +49,6 @@ static inline uint64_t integerify(uint32_t *B, const uint32_t r)
+ 	return B[(2*r-1) * 16] | (uint64_t)B[(2*r-1) * 16 + 1] << 32;
+ }
+ 
+-static inline uint32_t load32(const uint8_t *p)
+-{
+-	return le32_to_cpu(*((uint32_t *) p));
+-}
+-
+-static inline void store32(const uint8_t *p, uint32_t val)
+-{
+-	*((uint32_t *) p) = cpu_to_le32(val);
+-}
+-
+ void cryptonite_scrypt_smix(uint8_t *B, const uint32_t r, const uint64_t N, uint32_t *V, uint32_t *XY)
+ {
+ 	uint32_t *X = XY;
+@@ -69,7 +59,7 @@ void cryptonite_scrypt_smix(uint8_t *B, const uint32_t r, const uint64_t N, uint
+ 	const int r32 = 32*r;
+ 
+ 	for (k = 0; k < r32; k++)
+-		X[k] = load32(&B[4 * k]);
++		X[k] = load_le32(&B[4 * k]);
+ 	for (i = 0; i < N; i += 2) {
+ 		array_copy32(&V[i * r32], X, r32);
+ 		blockmix_salsa8(X, Y, Z, r);
+@@ -86,5 +76,5 @@ void cryptonite_scrypt_smix(uint8_t *B, const uint32_t r, const uint64_t N, uint
+ 		blockmix_salsa8(Y, X, Z, r);
+ 	}
+ 	for (k = 0; k < r32; k++)
+-		store32(&B[4*k], X[k]);
++		store_le32(&B[4*k], X[k]);
+ }
+diff --git a/cbits/cryptonite_sha1.c b/cbits/cryptonite_sha1.c
+index 33b6fa0..c508903 100644
+--- a/cbits/cryptonite_sha1.c
++++ b/cbits/cryptonite_sha1.c
+@@ -53,11 +53,11 @@ void cryptonite_sha1_init(struct sha1_ctx *ctx)
+ #define M(i)  (w[i & 0x0f] = rol32(w[i & 0x0f] ^ w[(i - 14) & 0x0f] \
+               ^ w[(i - 8) & 0x0f] ^ w[(i - 3) & 0x0f], 1))
+ 
+-static inline void sha1_do_chunk(struct sha1_ctx *ctx, uint32_t *buf)
++static inline void sha1_do_chunk(struct sha1_ctx *ctx, const uint8_t *buf)
+ {
+ 	uint32_t a, b, c, d, e;
+ 	uint32_t w[16];
+-#define CPY(i)	w[i] = be32_to_cpu(buf[i])
++#define CPY(i)	w[i] = load_be32(buf+4*i)
+ 	CPY(0); CPY(1); CPY(2); CPY(3); CPY(4); CPY(5); CPY(6); CPY(7);
+ 	CPY(8); CPY(9); CPY(10); CPY(11); CPY(12); CPY(13); CPY(14); CPY(15);
+ #undef CPY
+@@ -167,7 +167,7 @@ void cryptonite_sha1_update(struct sha1_ctx *ctx, const uint8_t *data, uint32_t
+ 	/* process partial buffer if there's enough data to make a block */
+ 	if (index && len >= to_fill) {
+ 		memcpy(ctx->buf + index, data, to_fill);
+-		sha1_do_chunk(ctx, (uint32_t *) ctx->buf);
++		sha1_do_chunk(ctx, ctx->buf);
+ 		len -= to_fill;
+ 		data += to_fill;
+ 		index = 0;
+@@ -175,7 +175,7 @@ void cryptonite_sha1_update(struct sha1_ctx *ctx, const uint8_t *data, uint32_t
+ 
+ 	/* process as much 64-block as possible */
+ 	for (; len >= 64; len -= 64, data += 64)
+-		sha1_do_chunk(ctx, (uint32_t *) data);
++		sha1_do_chunk(ctx, data);
+ 
+ 	/* append data into buf */
+ 	if (len)
+@@ -187,7 +187,6 @@ void cryptonite_sha1_finalize(struct sha1_ctx *ctx, uint8_t *out)
+ 	static uint8_t padding[64] = { 0x80, };
+ 	uint64_t bits;
+ 	uint32_t index, padlen;
+-	uint32_t *p = (uint32_t *) out;
+ 
+ 	/* add padding and update data with it */
+ 	bits = cpu_to_be64(ctx->sz << 3);
+@@ -201,9 +200,9 @@ void cryptonite_sha1_finalize(struct sha1_ctx *ctx, uint8_t *out)
+ 	cryptonite_sha1_update(ctx, (uint8_t *) &bits, sizeof(bits));
+ 
+ 	/* output hash */
+-	p[0] = cpu_to_be32(ctx->h[0]);
+-	p[1] = cpu_to_be32(ctx->h[1]);
+-	p[2] = cpu_to_be32(ctx->h[2]);
+-	p[3] = cpu_to_be32(ctx->h[3]);
+-	p[4] = cpu_to_be32(ctx->h[4]);
++	store_be32(out   , ctx->h[0]);
++	store_be32(out+ 4, ctx->h[1]);
++	store_be32(out+ 8, ctx->h[2]);
++	store_be32(out+12, ctx->h[3]);
++	store_be32(out+16, ctx->h[4]);
+ }
+diff --git a/cbits/cryptonite_sha256.c b/cbits/cryptonite_sha256.c
+index b93d815..7df46e0 100644
+--- a/cbits/cryptonite_sha256.c
++++ b/cbits/cryptonite_sha256.c
+@@ -74,13 +74,16 @@ static const uint32_t k[] = {
+ #define s0(x)       (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3))
+ #define s1(x)       (ror32(x,17) ^ ror32(x,19) ^ (x >> 10))
+ 
+-static void sha256_do_chunk(struct sha256_ctx *ctx, uint32_t buf[])
++static void sha256_do_chunk(struct sha256_ctx *ctx, const uint8_t *buf)
+ {
+ 	uint32_t a, b, c, d, e, f, g, h, t1, t2;
+ 	int i;
+ 	uint32_t w[64];
+ 
+-	cpu_to_be32_array(w, buf, 16);
++#define CPY(i)	w[i] = load_be32(buf+4*i)
++	CPY(0); CPY(1); CPY(2); CPY(3); CPY(4); CPY(5); CPY(6); CPY(7);
++	CPY(8); CPY(9); CPY(10); CPY(11); CPY(12); CPY(13); CPY(14); CPY(15);
++#undef CPY
+ 	for (i = 16; i < 64; i++)
+ 		w[i] = s1(w[i - 2]) + w[i - 7] + s0(w[i - 15]) + w[i - 16];
+ 
+@@ -128,7 +131,7 @@ void cryptonite_sha256_update(struct sha256_ctx *ctx, const uint8_t *data, uint3
+ 	/* process partial buffer if there's enough data to make a block */
+ 	if (index && len >= to_fill) {
+ 		memcpy(ctx->buf + index, data, to_fill);
+-		sha256_do_chunk(ctx, (uint32_t *) ctx->buf);
++		sha256_do_chunk(ctx, ctx->buf);
+ 		len -= to_fill;
+ 		data += to_fill;
+ 		index = 0;
+@@ -136,7 +139,7 @@ void cryptonite_sha256_update(struct sha256_ctx *ctx, const uint8_t *data, uint3
+ 
+ 	/* process as much 64-block as possible */
+ 	for (; len >= 64; len -= 64, data += 64)
+-		sha256_do_chunk(ctx, (uint32_t *) data);
++		sha256_do_chunk(ctx, data);
+ 
+ 	/* append data into buf */
+ 	if (len)
+@@ -156,7 +159,6 @@ void cryptonite_sha256_finalize(struct sha256_ctx *ctx, uint8_t *out)
+ 	static uint8_t padding[64] = { 0x80, };
+ 	uint64_t bits;
+ 	uint32_t i, index, padlen;
+-	uint32_t *p = (uint32_t *) out;
+ 
+ 	/* cpu -> big endian */
+ 	bits = cpu_to_be64(ctx->sz << 3);
+@@ -171,5 +173,5 @@ void cryptonite_sha256_finalize(struct sha256_ctx *ctx, uint8_t *out)
+ 
+ 	/* store to digest */
+ 	for (i = 0; i < 8; i++)
+-		p[i] = cpu_to_be32(ctx->h[i]);
++		store_be32(out+4*i, ctx->h[i]);
+ }
+diff --git a/cbits/cryptonite_sha3.c b/cbits/cryptonite_sha3.c
+index 5605421..befc783 100644
+--- a/cbits/cryptonite_sha3.c
++++ b/cbits/cryptonite_sha3.c
+@@ -49,14 +49,14 @@ static const int keccak_rotc[24] =
+ static const int keccak_piln[24] =
+ 	{ 10,7,11,17,18,3,5,16,8,21,24,4,15,23,19,13,12,2,20,14,22,9,6,1 };
+ 
+-static inline void sha3_do_chunk(uint64_t state[25], uint64_t buf[], int bufsz)
++static inline void sha3_do_chunk(uint64_t state[25], const uint8_t *buf, int bufsz)
  {
- 	uint32_t to_fill;
-+	uint64_t *data_aligned = NULL;
+ 	int i, j, r;
+ 	uint64_t tmp, bc[5];
  
+ 	/* merge buf with state */
+-	for (i = 0; i < bufsz; i++)
+-		state[i] ^= le64_to_cpu(buf[i]);
++	for (i = 0; i < bufsz / 8; i++)
++		state[i] ^= load_le64(buf+8*i);
+ 
+ 	/* run keccak rounds */
+ 	for (r = 0; r < KECCAK_NB_ROUNDS; r++) {
+@@ -111,14 +111,14 @@ void cryptonite_sha3_update(struct sha3_ctx *ctx, const uint8_t *data, uint32_t
  	to_fill = ctx->bufsz - ctx->bufindex;
  
-@@ -124,6 +126,13 @@ void cryptonite_sha3_update(struct sha3_
+ 	if (ctx->bufindex == ctx->bufsz) {
+-		sha3_do_chunk(ctx->state, (uint64_t *) ctx->buf, ctx->bufsz / 8);
++		sha3_do_chunk(ctx->state, ctx->buf, ctx->bufsz);
  		ctx->bufindex = 0;
  	}
  
-+	/* fix up alignment if necessary */
-+	if (len && (unsigned long) data & 7) {
-+		data_aligned = malloc(len);
-+		memcpy(data_aligned, data, len);
-+		data = (uint8_t *) data_aligned;
-+	}
-+
+ 	/* process partial buffer if there's enough data to make a block */
+ 	if (ctx->bufindex && len >= to_fill) {
+ 		memcpy(ctx->buf + ctx->bufindex, data, to_fill);
+-		sha3_do_chunk(ctx->state, (uint64_t *) ctx->buf, ctx->bufsz / 8);
++		sha3_do_chunk(ctx->state, ctx->buf, ctx->bufsz);
+ 		len -= to_fill;
+ 		data += to_fill;
+ 		ctx->bufindex = 0;
+@@ -126,7 +126,7 @@ void cryptonite_sha3_update(struct sha3_ctx *ctx, const uint8_t *data, uint32_t
+ 
  	/* process as much ctx->bufsz-block */
  	for (; len >= ctx->bufsz; len -= ctx->bufsz, data += ctx->bufsz)
- 		sha3_do_chunk(ctx->state, (uint64_t *) data, ctx->bufsz / 8);
-@@ -133,6 +142,7 @@ void cryptonite_sha3_update(struct sha3_
- 		memcpy(ctx->buf + ctx->bufindex, data, len);
- 		ctx->bufindex += len;
+-		sha3_do_chunk(ctx->state, (uint64_t *) data, ctx->bufsz / 8);
++		sha3_do_chunk(ctx->state, data, ctx->bufsz);
+ 
+ 	/* append data into buf */
+ 	if (len) {
+@@ -141,7 +141,7 @@ void cryptonite_sha3_finalize(struct sha3_ctx *ctx, uint32_t hashlen, uint8_t *o
+ 
+ 	/* process full buffer if needed */
+ 	if (ctx->bufindex == ctx->bufsz) {
+-		sha3_do_chunk(ctx->state, (uint64_t *) ctx->buf, ctx->bufsz / 8);
++		sha3_do_chunk(ctx->state, ctx->buf, ctx->bufsz);
+ 		ctx->bufindex = 0;
  	}
-+	free(data_aligned);
- }
  
- void cryptonite_sha3_finalize(struct sha3_ctx *ctx, uint32_t hashlen, uint8_t *out)
+@@ -151,7 +151,7 @@ void cryptonite_sha3_finalize(struct sha3_ctx *ctx, uint32_t hashlen, uint8_t *o
+ 	ctx->buf[ctx->bufsz - 1] |= 0x80;
+ 
+ 	/* process */
+-	sha3_do_chunk(ctx->state, (uint64_t *) ctx->buf, ctx->bufsz / 8);
++	sha3_do_chunk(ctx->state, ctx->buf, ctx->bufsz);
+ 
+ 	/* output */
+ 	cpu_to_le64_array(w, ctx->state, 25);
+diff --git a/cbits/cryptonite_sha512.c b/cbits/cryptonite_sha512.c
+index 75ec791..dca76c6 100644
+--- a/cbits/cryptonite_sha512.c
++++ b/cbits/cryptonite_sha512.c
+@@ -90,13 +90,16 @@ static const uint64_t k[] = {
+ #define s0(x)       (ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7))
+ #define s1(x)       (ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6))
+ 
+-static void sha512_do_chunk(struct sha512_ctx *ctx, uint64_t *buf)
++static void sha512_do_chunk(struct sha512_ctx *ctx, const uint8_t *buf)
+ {
+ 	uint64_t a, b, c, d, e, f, g, h, t1, t2;
+ 	int i;
+ 	uint64_t w[80];
+ 
+-	cpu_to_be64_array(w, buf, 16);
++#define CPY(i)	w[i] = load_be64(buf+8*i)
++	CPY(0); CPY(1); CPY(2); CPY(3); CPY(4); CPY(5); CPY(6); CPY(7);
++	CPY(8); CPY(9); CPY(10); CPY(11); CPY(12); CPY(13); CPY(14); CPY(15);
++#undef CPY
+ 
+ 	for (i = 16; i < 80; i++)
+ 		w[i] = s1(w[i - 2]) + w[i - 7] + s0(w[i - 15]) + w[i - 16];
+@@ -147,7 +150,7 @@ void cryptonite_sha512_update(struct sha512_ctx *ctx, const uint8_t *data, uint3
+ 	/* process partial buffer if there's enough data to make a block */
+ 	if (index && len >= to_fill) {
+ 		memcpy(ctx->buf + index, data, to_fill);
+-		sha512_do_chunk(ctx, (uint64_t *) ctx->buf);
++		sha512_do_chunk(ctx, ctx->buf);
+ 		len -= to_fill;
+ 		data += to_fill;
+ 		index = 0;
+@@ -155,7 +158,7 @@ void cryptonite_sha512_update(struct sha512_ctx *ctx, const uint8_t *data, uint3
+ 
+ 	/* process as much 128-block as possible */
+ 	for (; len >= 128; len -= 128, data += 128)
+-		sha512_do_chunk(ctx, (uint64_t *) data);
++		sha512_do_chunk(ctx, data);
+ 
+ 	/* append data into buf */
+ 	if (len)
+@@ -224,7 +227,7 @@ void cryptonite_sha512t_init(struct sha512_ctx *ctx, uint32_t hashlen)
+ 		break;
+ 	default: {
+ 		char buf[8+4];
+-		uint8_t out[64];
++		uint64_t out[8];
+ 		int i;
+ 
+ 		cryptonite_sha512_init(ctx);
+@@ -233,12 +236,12 @@ void cryptonite_sha512t_init(struct sha512_ctx *ctx, uint32_t hashlen)
+ 
+ 		i = sprintf(buf, "SHA-512/%d", hashlen);
+ 		cryptonite_sha512_update(ctx, (uint8_t *) buf, i);
+-		cryptonite_sha512_finalize(ctx, out);
++		cryptonite_sha512_finalize(ctx, (uint8_t *) out);
+ 
+ 		/* re-init the context, otherwise len is changed */
+ 		memset(ctx, 0, sizeof(*ctx));
+ 		for (i = 0; i < 8; i++)
+-			ctx->h[i] = cpu_to_be64(((uint64_t *) out)[i]);
++			ctx->h[i] = cpu_to_be64(out[i]);
+ 		}
+ 	}
+ }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-haskell/DHG_packages.git



More information about the Pkg-haskell-commits mailing list