[pkg-remote-commits] [xrdp] 01/01: Re-enable SIMD code (minus the two files) on any-i386
Thorsten Glaser
tg at moszumanska.debian.org
Thu Jan 26 18:31:23 UTC 2017
This is an automated email from the git hooks/post-receive script.
tg pushed a commit to branch experimental
in repository xrdp.
commit adb43db1fe4851d8337f1966e73a0a1ef2539af4
Author: mirabilos <t.glaser at tarent.de>
Date: Thu Jan 26 19:30:20 2017 +0100
Re-enable SIMD code (minus the two files) on any-i386
---
debian/control | 3 +-
debian/patches/i386-pic-asm-part1.diff | 1 +
debian/patches/i386-pic-asm-part2.diff | 1405 ++++++++++++++++++++++++++++++++
debian/patches/series | 1 +
debian/rules | 23 -
5 files changed, 1408 insertions(+), 25 deletions(-)
diff --git a/debian/control b/debian/control
index d627eeb..a477588 100644
--- a/debian/control
+++ b/debian/control
@@ -20,8 +20,7 @@ Build-Depends:
libx11-dev,
libxfixes-dev,
libxrandr-dev,
-# for now; see debian/rules for details
- nasm [amd64 kfreebsd-amd64],
+ nasm [amd64 hurd-i386 i386 kfreebsd-amd64 kfreebsd-i386],
openssl,
pkg-config,
systemd [linux-any],
diff --git a/debian/patches/i386-pic-asm-part1.diff b/debian/patches/i386-pic-asm-part1.diff
index 8c8ea4b..d8bc0c5 100644
--- a/debian/patches/i386-pic-asm-part1.diff
+++ b/debian/patches/i386-pic-asm-part1.diff
@@ -1,5 +1,6 @@
From: Thorsten Glaser <tg at mirbsd.org>
Subject: Free the ebx register from use of the assembly code (WIP)
+ missing: xorgxrdp/module/x86/ i420_to_rgb32_x86_sse2.asm yv12_to_rgb32_x86_sse2.asm
--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
+++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
diff --git a/debian/patches/i386-pic-asm-part2.diff b/debian/patches/i386-pic-asm-part2.diff
new file mode 100644
index 0000000..769979c
--- /dev/null
+++ b/debian/patches/i386-pic-asm-part2.diff
@@ -0,0 +1,1405 @@
+From: Thorsten Glaser <tg at mirbsd.org>
+Subject: Convert i386 asm code to ELF PIC (WIP)
+
+--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
++++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2016 Jay Sorg
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -21,6 +22,29 @@
+
+ %ifidn __OUTPUT_FORMAT__,elf
+ section .note.GNU-stack noalloc noexec nowrite progbits
++%ifdef PIC
++section .text
++extern _GLOBAL_OFFSET_TABLE_
++.get_GOT:
++ mov ebx,dword ptr [esp]
++ ret
++%define lsym(name) ebx + name wrt ..gotoff
++%macro get_GOT 0
++ call .get_GOT
++ add ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
++%endmacro
++%else
++%endif
++%else
++; not ELF
++%ifdef PIC
++%error Position-Independent Code is currently only supported for ELF
++%endif
++%endif
++%ifndef lsym
++%define lsym(name) name
++%macro get_GOT 0
++%endmacro
+ %endif
+
+ section .data
+@@ -68,8 +92,8 @@ loop1a:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -79,8 +103,8 @@ loop1a:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -98,8 +122,8 @@ loop1a:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -260,8 +284,8 @@ loop1c:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -271,8 +295,8 @@ loop1c:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -290,8 +314,8 @@ loop1c:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -341,8 +365,8 @@ loop1c:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -352,8 +376,8 @@ loop1c:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -371,8 +395,8 @@ loop1c:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -436,8 +460,8 @@ loop1c1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -447,8 +471,8 @@ loop1c1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -466,8 +490,8 @@ loop1c1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -514,8 +538,8 @@ loop1c1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -525,8 +549,8 @@ loop1c1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -544,8 +568,8 @@ loop1c1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -703,8 +727,8 @@ loop1e:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -714,8 +738,8 @@ loop1e:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -733,8 +757,8 @@ loop1e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -787,8 +811,8 @@ loop2e:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -798,8 +822,8 @@ loop2e:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -817,8 +841,8 @@ loop2e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -870,8 +894,8 @@ loop2e:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -881,8 +905,8 @@ loop2e:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -900,8 +924,8 @@ loop2e:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -965,8 +989,8 @@ loop1e1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -976,8 +1000,8 @@ loop1e1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -995,8 +1019,8 @@ loop1e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -1046,8 +1070,8 @@ loop2e1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -1057,8 +1081,8 @@ loop2e1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -1076,8 +1100,8 @@ loop2e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -1126,8 +1150,8 @@ loop2e1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ pslld xmm1, 16
+ pslld xmm2, 16
+ psrad xmm1, 16
+@@ -1137,8 +1161,8 @@ loop2e1:
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ pslld xmm2, 16
+ pslld xmm3, 16
+ psrad xmm2, 16
+@@ -1156,8 +1180,8 @@ loop2e1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ pslld xmm3, 16
+ pslld xmm4, 16
+ psrad xmm3, 16
+@@ -1220,9 +1244,9 @@ loop1f:
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm1, [cw128]
+- psubw xmm2, [cw128]
+- psubw xmm3, [cw128]
++ psubw xmm1, [lsym(cw128)]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm1, 5
+ psllw xmm2, 5
+ psllw xmm3, 5
+@@ -1254,8 +1278,8 @@ loop2f:
+ movq xmm3, [esi + 64 * 1 * 2] ; src[2n + 2]
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm2, [cw128]
+- psubw xmm3, [cw128]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm2, 5
+ psllw xmm3, 5
+ movdqa xmm4, xmm1
+@@ -1287,7 +1311,7 @@ loop2f:
+ movdqa xmm1, xmm3 ; src[2n]
+ movq xmm2, [esi + 64 * 1] ; src[2n + 1]
+ punpcklbw xmm2, xmm0
+- psubw xmm2, [cw128]
++ psubw xmm2, [lsym(cw128)]
+ psllw xmm2, 5
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -1328,7 +1352,7 @@ set_quants_hi:
+ movd xmm1, eax
+ movdqa LHI_SFT, xmm1
+ imul eax, 16
+- lea edx, [cwa0]
++ lea edx, [lsym(cwa0)]
+ add edx, eax
+ movdqa xmm1, [edx]
+ movdqa LHI_ADD, xmm1
+@@ -1339,7 +1363,7 @@ set_quants_lo:
+ movd xmm1, eax
+ movdqa LLO_SFT, xmm1
+ imul eax, 16
+- lea edx, [cwa0]
++ lea edx, [lsym(cwa0)]
+ add edx, eax
+ movdqa xmm1, [edx]
+ movdqa LLO_ADD, xmm1
+@@ -1375,6 +1399,7 @@ PROC _rfxcodec_encode_dwt_shift_x86_sse2
+ movdqu [esp], xmm0
+ ; save registers
+ push ebx
++ get_GOT
+ push esi
+ push edi
+ push ebp
+--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
++++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2016 Jay Sorg
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -21,6 +22,29 @@
+
+ %ifidn __OUTPUT_FORMAT__,elf
+ section .note.GNU-stack noalloc noexec nowrite progbits
++%ifdef PIC
++section .text
++extern _GLOBAL_OFFSET_TABLE_
++.get_GOT:
++ mov ebx,dword ptr [esp]
++ ret
++%define lsym(name) ebx + name wrt ..gotoff
++%macro get_GOT 0
++ call .get_GOT
++ add ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
++%endmacro
++%else
++%endif
++%else
++; not ELF
++%ifdef PIC
++%error Position-Independent Code is currently only supported for ELF
++%endif
++%endif
++%ifndef lsym
++%define lsym(name) name
++%macro get_GOT 0
++%endmacro
+ %endif
+
+ section .data
+@@ -68,15 +92,15 @@ loop1a:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -90,8 +114,8 @@ loop1a:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -248,15 +272,15 @@ loop1c:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -270,8 +294,8 @@ loop1c:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -317,15 +341,15 @@ loop1c:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -339,8 +363,8 @@ loop1c:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -400,15 +424,15 @@ loop1c1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -422,8 +446,8 @@ loop1c1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -466,15 +490,15 @@ loop1c1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -488,8 +512,8 @@ loop1c1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -643,15 +667,15 @@ loop1e:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -665,8 +689,8 @@ loop1e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -715,15 +739,15 @@ loop2e:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -737,8 +761,8 @@ loop2e:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -786,15 +810,15 @@ loop2e:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -808,8 +832,8 @@ loop2e:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -869,15 +893,15 @@ loop1e1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -891,8 +915,8 @@ loop1e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -938,15 +962,15 @@ loop2e1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -960,8 +984,8 @@ loop2e1:
+ movd xmm5, eax
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -1006,15 +1030,15 @@ loop2e1:
+ movdqa xmm2, [esi + 16]
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm2
+- pand xmm1, [cdFFFF]
+- pand xmm2, [cdFFFF]
++ pand xmm1, [lsym(cdFFFF)]
++ pand xmm2, [lsym(cdFFFF)]
+ packusdw xmm1, xmm2
+ movdqa xmm2, xmm6 ; src[2n + 1]
+ movdqa xmm3, xmm7
+ psrldq xmm2, 2
+ psrldq xmm3, 2
+- pand xmm2, [cdFFFF]
+- pand xmm3, [cdFFFF]
++ pand xmm2, [lsym(cdFFFF)]
++ pand xmm3, [lsym(cdFFFF)]
+ packusdw xmm2, xmm3
+ movdqa xmm3, xmm6 ; src[2n + 2]
+ movdqa xmm4, xmm7
+@@ -1028,8 +1052,8 @@ loop2e1:
+ psrldq xmm5, 12
+ pslldq xmm5, 12
+ por xmm4, xmm5
+- pand xmm3, [cdFFFF]
+- pand xmm4, [cdFFFF]
++ pand xmm3, [lsym(cdFFFF)]
++ pand xmm4, [lsym(cdFFFF)]
+ packusdw xmm3, xmm4
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -1088,9 +1112,9 @@ loop1f:
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm1, [cw128]
+- psubw xmm2, [cw128]
+- psubw xmm3, [cw128]
++ psubw xmm1, [lsym(cw128)]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm1, 5
+ psllw xmm2, 5
+ psllw xmm3, 5
+@@ -1122,8 +1146,8 @@ loop2f:
+ movq xmm3, [esi + 64 * 1 * 2] ; src[2n + 2]
+ punpcklbw xmm2, xmm0
+ punpcklbw xmm3, xmm0
+- psubw xmm2, [cw128]
+- psubw xmm3, [cw128]
++ psubw xmm2, [lsym(cw128)]
++ psubw xmm3, [lsym(cw128)]
+ psllw xmm2, 5
+ psllw xmm3, 5
+ movdqa xmm4, xmm1
+@@ -1155,7 +1179,7 @@ loop2f:
+ movdqa xmm1, xmm3 ; src[2n]
+ movq xmm2, [esi + 64 * 1] ; src[2n + 1]
+ punpcklbw xmm2, xmm0
+- psubw xmm2, [cw128]
++ psubw xmm2, [lsym(cw128)]
+ psllw xmm2, 5
+ movdqa xmm4, xmm1
+ movdqa xmm5, xmm2
+@@ -1196,7 +1220,7 @@ set_quants_hi:
+ movd xmm1, eax
+ movdqa LHI_SFT, xmm1
+ imul eax, 16
+- lea edx, [cwa0]
++ lea edx, [lsym(cwa0)]
+ add edx, eax
+ movdqa xmm1, [edx]
+ movdqa LHI_ADD, xmm1
+@@ -1207,7 +1231,7 @@ set_quants_lo:
+ movd xmm1, eax
+ movdqa LLO_SFT, xmm1
+ imul eax, 16
+- lea edx, [cwa0]
++ lea edx, [lsym(cwa0)]
+ add edx, eax
+ movdqa xmm1, [edx]
+ movdqa LLO_ADD, xmm1
+@@ -1243,6 +1267,7 @@ PROC _rfxcodec_encode_dwt_shift_x86_sse4
+ movdqu [esp], xmm0
+ ; save registers
+ push ebx
++ get_GOT
+ push esi
+ push edi
+ push ebp
+--- a/xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
++++ b/xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2014 Jay Sorg
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -22,7 +23,30 @@
+ ;
+
+ %ifidn __OUTPUT_FORMAT__,elf
+-SECTION .note.GNU-stack noalloc noexec nowrite progbits
++section .note.GNU-stack noalloc noexec nowrite progbits
++%ifdef PIC
++section .text
++extern _GLOBAL_OFFSET_TABLE_
++.get_GOT:
++ mov ebx,dword ptr [esp]
++ ret
++%define lsym(name) ebx + name wrt ..gotoff
++%macro get_GOT 0
++ call .get_GOT
++ add ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
++%endmacro
++%else
++%endif
++%else
++; not ELF
++%ifdef PIC
++%error Position-Independent Code is currently only supported for ELF
++%endif
++%endif
++%ifndef lsym
++%define lsym(name) name
++%macro get_GOT 0
++%endmacro
+ %endif
+
+ SECTION .data
+@@ -49,13 +73,14 @@ PROC a8r8g8b8_to_a8b8g8r8_box_x86_sse2
+ PROC _a8r8g8b8_to_a8b8g8r8_box_x86_sse2
+ %endif
+ push ebx
++ get_GOT
+ push esi
+ push edi
+ push ebp
+
+- movdqa xmm4, [c1]
+- movdqa xmm5, [c2]
+- movdqa xmm6, [c3]
... 492 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-remote/xrdp.git
More information about the pkg-remote-commits
mailing list