[pkg-remote-commits] [xrdp] 01/06: bring back assembly patches
Thorsten Glaser
tg at moszumanska.debian.org
Sat Mar 25 21:50:00 UTC 2017
This is an automated email from the git hooks/post-receive script.
tg pushed a commit to branch experimental
in repository xrdp.
commit a74aad49df254e01260855be002558ca8f7d66f8
Author: mirabilos <thorsten.glaser at teckids.org>
Date: Sat Mar 25 21:53:49 2017 +0100
bring back assembly patches
git diff to the respective upstream/devel branch as of
- librfxcodec ce09addf016d6186b798752d7bdddf6957e4615f
- xorgxrdp 12d154fec31d4907b82d47b77c6df3f684dbfc06
---
debian/patches/asm-librfxcodec.diff | 2071 +++++++++++++++++++++++++++++++++++
debian/patches/asm-xorgxrdp.diff | 1622 +++++++++++++++++++++++++++
debian/patches/series | 2 +
3 files changed, 3695 insertions(+)
diff --git a/debian/patches/asm-librfxcodec.diff b/debian/patches/asm-librfxcodec.diff
new file mode 100644
index 0000000..9e1b13e
--- /dev/null
+++ b/debian/patches/asm-librfxcodec.diff
@@ -0,0 +1,2071 @@
+--- a/librfxcodec/src/amd64/Makefile.am
++++ b/librfxcodec/src/amd64/Makefile.am
+@@ -1,3 +1,5 @@
++NAFLAGS += -DASM_ARCH_AMD64
++
+ AMD64_ASM = \
+ cpuid_amd64.asm \
+ rfxcodec_encode_dwt_shift_amd64_sse2.asm \
+--- a/librfxcodec/src/amd64/cpuid_amd64.asm
++++ b/librfxcodec/src/amd64/cpuid_amd64.asm
+@@ -1,7 +1,5 @@
+ %include "common.asm"
+
+-section .text
+-
+ ;The first six integer or pointer arguments are passed in registers
+ ;RDI, RSI, RDX, RCX, R8, and R9
+
+@@ -32,5 +30,3 @@ PROC cpuid_amd64
+ ; restore registers
+ pop rbx
+ ret
+- align 16
+-
+--- a/librfxcodec/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm
++++ b/librfxcodec/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse2.asm
+@@ -21,8 +21,7 @@
+
+ %include "common.asm"
+
+-section .data
+- align 16
++PREPARE_RODATA
+ cw128 times 8 dw 128
+ cdFFFF times 4 dd 65535
+ ; these are 1 << (factor - 1) 0 to 15 is factor
+@@ -43,8 +42,6 @@ section .data
+ cwa8192 times 8 dw 8192 ; 14
+ cwa16384 times 8 dw 16384 ; 15
+
+-section .text
+-
+ ;******************************************************************************
+ ; source 16 bit signed, 16 pixel width
+ rfx_dwt_2d_encode_block_horiz_16_16:
+@@ -55,8 +52,8 @@ loop1a:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -66,8 +63,8 @@ loop1a:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -85,8 +82,8 @@ loop1a:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -247,8 +244,8 @@ loop1c:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -258,8 +255,8 @@ loop1c:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -277,8 +274,8 @@ loop1c:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -328,8 +325,8 @@ loop1c:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -339,8 +336,8 @@ loop1c:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -358,8 +355,8 @@ loop1c:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -423,8 +420,8 @@ loop1c1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -434,8 +431,8 @@ loop1c1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -453,8 +450,8 @@ loop1c1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -501,8 +498,8 @@ loop1c1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -512,8 +509,8 @@ loop1c1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -531,8 +528,8 @@ loop1c1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -690,8 +687,8 @@ loop1e:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -701,8 +698,8 @@ loop1e:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -720,8 +717,8 @@ loop1e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -774,8 +771,8 @@ loop2e:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -785,8 +782,8 @@ loop2e:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -804,8 +801,8 @@ loop2e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -857,8 +854,8 @@ loop2e:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -868,8 +865,8 @@ loop2e:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -887,8 +884,8 @@ loop2e:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -952,8 +949,8 @@ loop1e1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -963,8 +960,8 @@ loop1e1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -982,8 +979,8 @@ loop1e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -1033,8 +1030,8 @@ loop2e1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -1044,8 +1041,8 @@ loop2e1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -1063,8 +1060,8 @@ loop2e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -1113,8 +1110,8 @@ loop2e1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -1124,8 +1121,8 @@ loop2e1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -1143,8 +1140,8 @@ loop2e1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -1207,9 +1204,9 @@ loop1f:
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm1, [rel cw128]
+- psubw xmm2, [rel cw128]
+- psubw xmm3, [rel cw128]
++ psubw xmm1, [lsym(cw128)]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm1, 5
+ psllw xmm2, 5
+ psllw xmm3, 5
+@@ -1241,8 +1238,8 @@ loop2f:
+ movq xmm3, [rsi + 64 * 1 * 2] ; src[2n + 2]
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm2, [rel cw128]
+- psubw xmm3, [rel cw128]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm2, 5
+ psllw xmm3, 5
+ movdqa xmm4, xmm1
+@@ -1274,7 +1271,7 @@ loop2f:
+ movdqa xmm1, xmm3 ; src[2n]
+ movq xmm2, [rsi + 64 * 1] ; src[2n + 1]
+ punpcklbw xmm2, xmm0
+- psubw xmm2, [rel cw128]
++ psubw xmm2, [lsym(cw128)]
+ psllw xmm2, 5
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -1314,7 +1311,7 @@ set_quants_hi:
+ sub rax, 6 - 5
+ movd xmm9, eax
+ imul rax, 16
+- lea rdx, [rel cwa0]
++ lea rdx, [lsym(cwa0)]
+ add rdx, rax
+ movdqa xmm8, [rdx]
+ ret
+@@ -1323,7 +1320,7 @@ set_quants_lo:
+ sub rax, 6 - 5
+ movd xmm11, eax
+ imul rax, 16
+- lea rdx, [rel cwa0]
++ lea rdx, [lsym(cwa0)]
+ add rdx, rax
+ movdqa xmm10, [rdx]
+ ret
+@@ -1487,5 +1484,3 @@ PROC rfxcodec_encode_dwt_shift_amd64_sse
+ pop rdx
+ pop rbx
+ ret
+- align 16
+-
+--- a/librfxcodec/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm
++++ b/librfxcodec/src/amd64/rfxcodec_encode_dwt_shift_amd64_sse41.asm
+@@ -21,8 +21,7 @@
+
+ %include "common.asm"
+
+-section .data
+- align 16
++PREPARE_RODATA
+ cw128 times 8 dw 128
+ cdFFFF times 4 dd 65535
+ ; these are 1 << (factor - 1) 0 to 15 is factor
+@@ -43,8 +42,6 @@ section .data
+ cwa8192 times 8 dw 8192 ; 14
+ cwa16384 times 8 dw 16384 ; 15
+
+-section .text
+-
+ ;******************************************************************************
+ ; source 16 bit signed, 16 pixel width
+ rfx_dwt_2d_encode_block_horiz_16_16:
+@@ -55,15 +52,15 @@ loop1a:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -77,8 +74,8 @@ loop1a:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -235,15 +232,15 @@ loop1c:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -257,8 +254,8 @@ loop1c:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -304,15 +301,15 @@ loop1c:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -326,8 +323,8 @@ loop1c:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -387,15 +384,15 @@ loop1c1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -409,8 +406,8 @@ loop1c1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -453,15 +450,15 @@ loop1c1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -475,8 +472,8 @@ loop1c1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -630,15 +627,15 @@ loop1e:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -652,8 +649,8 @@ loop1e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -702,15 +699,15 @@ loop2e:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -724,8 +721,8 @@ loop2e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -773,15 +770,15 @@ loop2e:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -795,8 +792,8 @@ loop2e:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -856,15 +853,15 @@ loop1e1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -878,8 +875,8 @@ loop1e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -925,15 +922,15 @@ loop2e1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -947,8 +944,8 @@ loop2e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -993,15 +990,15 @@ loop2e1:
+ movdqa xmm2, [rsi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [rel cdFFFF]
+- pand xmm2, [rel cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [rel cdFFFF]
+- pand xmm3, [rel cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -1015,8 +1012,8 @@ loop2e1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [rel cdFFFF]
+- pand xmm4, [rel cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -1075,9 +1072,9 @@ loop1f:
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm1, [rel cw128]
+- psubw xmm2, [rel cw128]
+- psubw xmm3, [rel cw128]
++ psubw xmm1, [lsym(cw128)]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm1, 5
+ psllw xmm2, 5
+ psllw xmm3, 5
+@@ -1109,8 +1106,8 @@ loop2f:
+ movq xmm3, [rsi + 64 * 1 * 2] ; src[2n + 2]
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm2, [rel cw128]
+- psubw xmm3, [rel cw128]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm2, 5
+ psllw xmm3, 5
+ movdqa xmm4, xmm1
+@@ -1142,7 +1139,7 @@ loop2f:
+ movdqa xmm1, xmm3 ; src[2n]
+ movq xmm2, [rsi + 64 * 1] ; src[2n + 1]
+ punpcklbw xmm2, xmm0
+- psubw xmm2, [rel cw128]
++ psubw xmm2, [lsym(cw128)]
+ psllw xmm2, 5
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -1182,7 +1179,7 @@ set_quants_hi:
+ sub rax, 6 - 5
+ movd xmm9, eax
+ imul rax, 16
+- lea rdx, [rel cwa0]
++ lea rdx, [lsym(cwa0)]
+ add rdx, rax
+ movdqa xmm8, [rdx]
+ ret
+@@ -1191,7 +1188,7 @@ set_quants_lo:
+ sub rax, 6 - 5
+ movd xmm11, eax
+ imul rax, 16
+- lea rdx, [rel cwa0]
++ lea rdx, [lsym(cwa0)]
+ add rdx, rax
+ movdqa xmm10, [rdx]
+ ret
+@@ -1355,5 +1352,3 @@ PROC rfxcodec_encode_dwt_shift_amd64_sse
+ pop rdx
+ pop rbx
+ ret
+- align 16
+-
+--- a/librfxcodec/src/common.asm
++++ b/librfxcodec/src/common.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2017 Pavel Roskin
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -49,3 +50,49 @@ section .note.GNU-stack noalloc noexec n
+ _%1:
+ %endif
+ %endmacro
++
++; Macros for relative access to local data
++%undef lsym
++
++%ifdef ASM_ARCH_AMD64
++; amd64; don't define or call RETRIEVE_RODATA
++%define lsym(name) rel name
++; default case for PREPARE_RODATA
++%endif
++
++%ifdef ASM_ARCH_I386
++%ifdef PIC
++; i386 PIC
++%macro PREPARE_RODATA 0
++section .text
++.. at get_caller_address:
++ mov ebx, [esp]
++ ret
++align 16
++.. at rodata_begin:
++%endmacro
++%macro RETRIEVE_RODATA 0
++ call .. at get_caller_address
++%%the_caller_address:
++ sub ebx, %%the_caller_address - .. at rodata_begin
++%endmacro
++%define lsym(name) ebx + name - .. at rodata_begin
++%else
++; i386 non-PIC; default case for lsym, RETRIEVE_RODATA and PREPARE_RODATA
++%endif
++%endif
++
++%ifndef lsym
++%macro RETRIEVE_RODATA 0
++%endmacro
++%define lsym(name) name
++%endif
++
++%ifnmacro PREPARE_RODATA
++%macro PREPARE_RODATA 0
++section .text
++align 16
++%endmacro
++%endif
++
++section .text
+--- a/librfxcodec/src/x86/Makefile.am
++++ b/librfxcodec/src/x86/Makefile.am
+@@ -1,3 +1,5 @@
++NAFLAGS += -DASM_ARCH_I386
++
+ X86_ASM = \
+ cpuid_x86.asm \
+ rfxcodec_encode_dwt_shift_x86_sse2.asm \
+--- a/librfxcodec/src/x86/cpuid_x86.asm
++++ b/librfxcodec/src/x86/cpuid_x86.asm
+@@ -1,7 +1,5 @@
+ %include "common.asm"
+
+-section .text
+-
+ ;int
+ ;cpuid_x86(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx)
+
+@@ -29,6 +27,4 @@ PROC cpuid_x86
+ pop edx
+ pop ecx
+ pop ebx
+- ret;
+- align 16
+-
++ ret
+--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
++++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2016 Jay Sorg
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -21,8 +22,7 @@
... 2730 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-remote/xrdp.git
More information about the pkg-remote-commits
mailing list