[pkg-remote-commits] [xrdp] 01/01: Re-enable SIMD code (minus the two files) on any-i386

Thorsten Glaser tg at moszumanska.debian.org
Thu Jan 26 18:31:23 UTC 2017


This is an automated email from the git hooks/post-receive script.

tg pushed a commit to branch experimental
in repository xrdp.

commit adb43db1fe4851d8337f1966e73a0a1ef2539af4
Author: mirabilos <t.glaser at tarent.de>
Date:   Thu Jan 26 19:30:20 2017 +0100

    Re-enable SIMD code (minus the two files) on any-i386
---
 debian/control                         |    3 +-
 debian/patches/i386-pic-asm-part1.diff |    1 +
 debian/patches/i386-pic-asm-part2.diff | 1405 ++++++++++++++++++++++++++++++++
 debian/patches/series                  |    1 +
 debian/rules                           |   23 -
 5 files changed, 1408 insertions(+), 25 deletions(-)

diff --git a/debian/control b/debian/control
index d627eeb..a477588 100644
--- a/debian/control
+++ b/debian/control
@@ -20,8 +20,7 @@ Build-Depends:
  libx11-dev,
  libxfixes-dev,
  libxrandr-dev,
-# for now; see debian/rules for details
- nasm [amd64 kfreebsd-amd64],
+ nasm [amd64 hurd-i386 i386 kfreebsd-amd64 kfreebsd-i386],
  openssl,
  pkg-config,
  systemd [linux-any],
diff --git a/debian/patches/i386-pic-asm-part1.diff b/debian/patches/i386-pic-asm-part1.diff
index 8c8ea4b..d8bc0c5 100644
--- a/debian/patches/i386-pic-asm-part1.diff
+++ b/debian/patches/i386-pic-asm-part1.diff
@@ -1,5 +1,6 @@
 From: Thorsten Glaser <tg at mirbsd.org>
 Subject: Free the ebx register from use of the assembly code (WIP)
+ missing: xorgxrdp/module/x86/ i420_to_rgb32_x86_sse2.asm yv12_to_rgb32_x86_sse2.asm
 
 --- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
 +++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
diff --git a/debian/patches/i386-pic-asm-part2.diff b/debian/patches/i386-pic-asm-part2.diff
new file mode 100644
index 0000000..769979c
--- /dev/null
+++ b/debian/patches/i386-pic-asm-part2.diff
@@ -0,0 +1,1405 @@
+From: Thorsten Glaser <tg at mirbsd.org>
+Subject: Convert i386 asm code to ELF PIC (WIP)
+
+--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
++++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2016 Jay Sorg
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -21,6 +22,29 @@
+ 
+ %ifidn __OUTPUT_FORMAT__,elf
+ section .note.GNU-stack noalloc noexec nowrite progbits
++%ifdef PIC
++section .text
++extern _GLOBAL_OFFSET_TABLE_
++.get_GOT:
++	mov ebx,dword ptr [esp]
++	ret
++%define lsym(name) ebx + name wrt ..gotoff
++%macro get_GOT 0
++	call .get_GOT
++	add ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
++%endmacro
++%else
++%endif
++%else
++; not ELF
++%ifdef PIC
++%error Position-Independent Code is currently only supported for ELF
++%endif
++%endif
++%ifndef lsym
++%define lsym(name) name
++%macro get_GOT 0
++%endmacro
+ %endif
+ 
+ section .data
+@@ -68,8 +92,8 @@ loop1a:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -79,8 +103,8 @@ loop1a:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -98,8 +122,8 @@ loop1a:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -260,8 +284,8 @@ loop1c:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -271,8 +295,8 @@ loop1c:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -290,8 +314,8 @@ loop1c:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -341,8 +365,8 @@ loop1c:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -352,8 +376,8 @@ loop1c:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -371,8 +395,8 @@ loop1c:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -436,8 +460,8 @@ loop1c1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -447,8 +471,8 @@ loop1c1:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -466,8 +490,8 @@ loop1c1:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -514,8 +538,8 @@ loop1c1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -525,8 +549,8 @@ loop1c1:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -544,8 +568,8 @@ loop1c1:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -703,8 +727,8 @@ loop1e:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -714,8 +738,8 @@ loop1e:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -733,8 +757,8 @@ loop1e:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -787,8 +811,8 @@ loop2e:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -798,8 +822,8 @@ loop2e:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -817,8 +841,8 @@ loop2e:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -870,8 +894,8 @@ loop2e:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -881,8 +905,8 @@ loop2e:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -900,8 +924,8 @@ loop2e:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -965,8 +989,8 @@ loop1e1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -976,8 +1000,8 @@ loop1e1:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -995,8 +1019,8 @@ loop1e1:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -1046,8 +1070,8 @@ loop2e1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -1057,8 +1081,8 @@ loop2e1:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -1076,8 +1100,8 @@ loop2e1:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -1126,8 +1150,8 @@ loop2e1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     pslld xmm1, 16
+     pslld xmm2, 16
+     psrad xmm1, 16
+@@ -1137,8 +1161,8 @@ loop2e1:
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     pslld xmm2, 16
+     pslld xmm3, 16
+     psrad xmm2, 16
+@@ -1156,8 +1180,8 @@ loop2e1:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     pslld xmm3, 16
+     pslld xmm4, 16
+     psrad xmm3, 16
+@@ -1220,9 +1244,9 @@ loop1f:
+     punpcklbw xmm1, xmm0
+     punpcklbw xmm2, xmm0
+     punpcklbw xmm3, xmm0
+-    psubw xmm1, [cw128]
+-    psubw xmm2, [cw128]
+-    psubw xmm3, [cw128]
++    psubw xmm1, [lsym(cw128)]
++    psubw xmm2, [lsym(cw128)]
++    psubw xmm3, [lsym(cw128)]
+     psllw xmm1, 5
+     psllw xmm2, 5
+     psllw xmm3, 5
+@@ -1254,8 +1278,8 @@ loop2f:
+     movq xmm3, [esi + 64 * 1 * 2]       ; src[2n + 2]
+     punpcklbw xmm2, xmm0
+     punpcklbw xmm3, xmm0
+-    psubw xmm2, [cw128]
+-    psubw xmm3, [cw128]
++    psubw xmm2, [lsym(cw128)]
++    psubw xmm3, [lsym(cw128)]
+     psllw xmm2, 5
+     psllw xmm3, 5
+     movdqa xmm4, xmm1
+@@ -1287,7 +1311,7 @@ loop2f:
+     movdqa xmm1, xmm3                   ; src[2n]
+     movq xmm2, [esi + 64 * 1]           ; src[2n + 1]
+     punpcklbw xmm2, xmm0
+-    psubw xmm2, [cw128]
++    psubw xmm2, [lsym(cw128)]
+     psllw xmm2, 5
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -1328,7 +1352,7 @@ set_quants_hi:
+     movd xmm1, eax
+     movdqa LHI_SFT, xmm1
+     imul eax, 16
+-    lea edx, [cwa0]
++    lea edx, [lsym(cwa0)]
+     add edx, eax
+     movdqa xmm1, [edx]
+     movdqa LHI_ADD, xmm1
+@@ -1339,7 +1363,7 @@ set_quants_lo:
+     movd xmm1, eax
+     movdqa LLO_SFT, xmm1
+     imul eax, 16
+-    lea edx, [cwa0]
++    lea edx, [lsym(cwa0)]
+     add edx, eax
+     movdqa xmm1, [edx]
+     movdqa LLO_ADD, xmm1
+@@ -1375,6 +1399,7 @@ PROC _rfxcodec_encode_dwt_shift_x86_sse2
+     movdqu [esp], xmm0
+     ; save registers
+     push ebx
++    get_GOT
+     push esi
+     push edi
+     push ebp
+--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
++++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2016 Jay Sorg
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -21,6 +22,29 @@
+ 
+ %ifidn __OUTPUT_FORMAT__,elf
+ section .note.GNU-stack noalloc noexec nowrite progbits
++%ifdef PIC
++section .text
++extern _GLOBAL_OFFSET_TABLE_
++.get_GOT:
++	mov ebx,dword ptr [esp]
++	ret
++%define lsym(name) ebx + name wrt ..gotoff
++%macro get_GOT 0
++	call .get_GOT
++	add ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
++%endmacro
++%else
++%endif
++%else
++; not ELF
++%ifdef PIC
++%error Position-Independent Code is currently only supported for ELF
++%endif
++%endif
++%ifndef lsym
++%define lsym(name) name
++%macro get_GOT 0
++%endmacro
+ %endif
+ 
+ section .data
+@@ -68,15 +92,15 @@ loop1a:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -90,8 +114,8 @@ loop1a:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -248,15 +272,15 @@ loop1c:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -270,8 +294,8 @@ loop1c:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -317,15 +341,15 @@ loop1c:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -339,8 +363,8 @@ loop1c:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -400,15 +424,15 @@ loop1c1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -422,8 +446,8 @@ loop1c1:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -466,15 +490,15 @@ loop1c1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -488,8 +512,8 @@ loop1c1:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -643,15 +667,15 @@ loop1e:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -665,8 +689,8 @@ loop1e:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -715,15 +739,15 @@ loop2e:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -737,8 +761,8 @@ loop2e:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -786,15 +810,15 @@ loop2e:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -808,8 +832,8 @@ loop2e:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -869,15 +893,15 @@ loop1e1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -891,8 +915,8 @@ loop1e1:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -938,15 +962,15 @@ loop2e1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -960,8 +984,8 @@ loop2e1:
+     movd xmm5, eax
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -1006,15 +1030,15 @@ loop2e1:
+     movdqa xmm2, [esi + 16]
+     movdqa xmm6, xmm1
+     movdqa xmm7, xmm2
+-    pand xmm1, [cdFFFF]
+-    pand xmm2, [cdFFFF]
++    pand xmm1, [lsym(cdFFFF)]
++    pand xmm2, [lsym(cdFFFF)]
+     packusdw xmm1, xmm2
+     movdqa xmm2, xmm6                   ; src[2n + 1]
+     movdqa xmm3, xmm7
+     psrldq xmm2, 2
+     psrldq xmm3, 2
+-    pand xmm2, [cdFFFF]
+-    pand xmm3, [cdFFFF]
++    pand xmm2, [lsym(cdFFFF)]
++    pand xmm3, [lsym(cdFFFF)]
+     packusdw xmm2, xmm3
+     movdqa xmm3, xmm6                   ; src[2n + 2]
+     movdqa xmm4, xmm7
+@@ -1028,8 +1052,8 @@ loop2e1:
+     psrldq xmm5, 12
+     pslldq xmm5, 12
+     por xmm4, xmm5
+-    pand xmm3, [cdFFFF]
+-    pand xmm4, [cdFFFF]
++    pand xmm3, [lsym(cdFFFF)]
++    pand xmm4, [lsym(cdFFFF)]
+     packusdw xmm3, xmm4
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -1088,9 +1112,9 @@ loop1f:
+     punpcklbw xmm1, xmm0
+     punpcklbw xmm2, xmm0
+     punpcklbw xmm3, xmm0
+-    psubw xmm1, [cw128]
+-    psubw xmm2, [cw128]
+-    psubw xmm3, [cw128]
++    psubw xmm1, [lsym(cw128)]
++    psubw xmm2, [lsym(cw128)]
++    psubw xmm3, [lsym(cw128)]
+     psllw xmm1, 5
+     psllw xmm2, 5
+     psllw xmm3, 5
+@@ -1122,8 +1146,8 @@ loop2f:
+     movq xmm3, [esi + 64 * 1 * 2]       ; src[2n + 2]
+     punpcklbw xmm2, xmm0
+     punpcklbw xmm3, xmm0
+-    psubw xmm2, [cw128]
+-    psubw xmm3, [cw128]
++    psubw xmm2, [lsym(cw128)]
++    psubw xmm3, [lsym(cw128)]
+     psllw xmm2, 5
+     psllw xmm3, 5
+     movdqa xmm4, xmm1
+@@ -1155,7 +1179,7 @@ loop2f:
+     movdqa xmm1, xmm3                   ; src[2n]
+     movq xmm2, [esi + 64 * 1]           ; src[2n + 1]
+     punpcklbw xmm2, xmm0
+-    psubw xmm2, [cw128]
++    psubw xmm2, [lsym(cw128)]
+     psllw xmm2, 5
+     movdqa xmm4, xmm1
+     movdqa xmm5, xmm2
+@@ -1196,7 +1220,7 @@ set_quants_hi:
+     movd xmm1, eax
+     movdqa LHI_SFT, xmm1
+     imul eax, 16
+-    lea edx, [cwa0]
++    lea edx, [lsym(cwa0)]
+     add edx, eax
+     movdqa xmm1, [edx]
+     movdqa LHI_ADD, xmm1
+@@ -1207,7 +1231,7 @@ set_quants_lo:
+     movd xmm1, eax
+     movdqa LLO_SFT, xmm1
+     imul eax, 16
+-    lea edx, [cwa0]
++    lea edx, [lsym(cwa0)]
+     add edx, eax
+     movdqa xmm1, [edx]
+     movdqa LLO_ADD, xmm1
+@@ -1243,6 +1267,7 @@ PROC _rfxcodec_encode_dwt_shift_x86_sse4
+     movdqu [esp], xmm0
+     ; save registers
+     push ebx
++    get_GOT
+     push esi
+     push edi
+     push ebp
+--- a/xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
++++ b/xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
+@@ -1,5 +1,6 @@
+ ;
+ ;Copyright 2014 Jay Sorg
++;Copyright 2017 mirabilos
+ ;
+ ;Permission to use, copy, modify, distribute, and sell this software and its
+ ;documentation for any purpose is hereby granted without fee, provided that
+@@ -22,7 +23,30 @@
+ ;
+ 
+ %ifidn __OUTPUT_FORMAT__,elf
+-SECTION .note.GNU-stack noalloc noexec nowrite progbits
++section .note.GNU-stack noalloc noexec nowrite progbits
++%ifdef PIC
++section .text
++extern _GLOBAL_OFFSET_TABLE_
++.get_GOT:
++	mov ebx,dword ptr [esp]
++	ret
++%define lsym(name) ebx + name wrt ..gotoff
++%macro get_GOT 0
++	call .get_GOT
++	add ebx,_GLOBAL_OFFSET_TABLE_+$$-.get_GOT wrt ..gotpc
++%endmacro
++%else
++%endif
++%else
++; not ELF
++%ifdef PIC
++%error Position-Independent Code is currently only supported for ELF
++%endif
++%endif
++%ifndef lsym
++%define lsym(name) name
++%macro get_GOT 0
++%endmacro
+ %endif
+ 
+ SECTION .data
+@@ -49,13 +73,14 @@ PROC a8r8g8b8_to_a8b8g8r8_box_x86_sse2
+ PROC _a8r8g8b8_to_a8b8g8r8_box_x86_sse2
+ %endif
+     push ebx
++    get_GOT
+     push esi
+     push edi
+     push ebp
+ 
+-    movdqa xmm4, [c1]
+-    movdqa xmm5, [c2]
+-    movdqa xmm6, [c3]
... 492 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-remote/xrdp.git



More information about the pkg-remote-commits mailing list