[pkg-remote-commits] [xrdp] 01/01: First round of i386 asm patches
Thorsten Glaser
tg at moszumanska.debian.org
Thu Jan 26 17:55:48 UTC 2017
This is an automated email from the git hooks/post-receive script.
tg pushed a commit to branch experimental
in repository xrdp.
commit 5a9b3303d6760bdb1f8103a06ac5ad6cd1218516
Author: mirabilos <t.glaser at tarent.de>
Date: Thu Jan 26 18:55:15 2017 +0100
First round of i386 asm patches
Upstream issues raised in the meantime:
- https://github.com/neutrinolabs/librfxcodec/issues/16
- https://github.com/neutrinolabs/xorgxrdp/issues/67
---
debian/patches/i386-pic-asm-part1.diff | 330 +++++++++++++++++++++++++++++++++
debian/patches/series | 1 +
debian/rules | 17 +-
3 files changed, 340 insertions(+), 8 deletions(-)
diff --git a/debian/patches/i386-pic-asm-part1.diff b/debian/patches/i386-pic-asm-part1.diff
new file mode 100644
index 0000000..8c8ea4b
--- /dev/null
+++ b/debian/patches/i386-pic-asm-part1.diff
@@ -0,0 +1,330 @@
+From: Thorsten Glaser <tg at mirbsd.org>
+Subject: Free the ebx register from use of the assembly code (WIP)
+
+--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
++++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
+@@ -324,7 +324,7 @@ loop1c:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa xmm6, xmm5 ; out lo
+ paddw xmm6, LLO_ADD
+@@ -395,7 +395,7 @@ loop1c:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+@@ -500,7 +500,7 @@ loop1c1:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa [edx], xmm5 ; out lo
+
+@@ -568,7 +568,7 @@ loop1c1:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+@@ -767,7 +767,7 @@ loop1e:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa xmm6, xmm5 ; out lo
+ paddw xmm6, LLO_ADD
+@@ -842,14 +842,14 @@ loop2e:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa xmm6, xmm5 ; out lo
+ paddw xmm6, LLO_ADD
+@@ -924,7 +924,7 @@ loop2e:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+@@ -1029,7 +1029,7 @@ loop1e1:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa [edx], xmm5 ; out lo
+
+@@ -1101,14 +1101,14 @@ loop2e1:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa [edx], xmm5 ; out lo
+
+@@ -1180,7 +1180,7 @@ loop2e1:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+--- a/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
++++ b/librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
+@@ -300,7 +300,7 @@ loop1c:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa xmm6, xmm5 ; out lo
+ paddw xmm6, LLO_ADD
+@@ -359,7 +359,7 @@ loop1c:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+@@ -452,7 +452,7 @@ loop1c1:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa [edx], xmm5 ; out lo
+
+@@ -508,7 +508,7 @@ loop1c1:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+@@ -695,7 +695,7 @@ loop1e:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa xmm6, xmm5 ; out lo
+ paddw xmm6, LLO_ADD
+@@ -758,14 +758,14 @@ loop2e:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa xmm6, xmm5 ; out lo
+ paddw xmm6, LLO_ADD
+@@ -828,7 +828,7 @@ loop2e:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+@@ -921,7 +921,7 @@ loop1e1:
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa [edx], xmm5 ; out lo
+
+@@ -981,14 +981,14 @@ loop2e1:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+ paddw xmm5, xmm1
+
+ psrldq xmm2, 14
+- movd ebx, xmm2 ; save hi
++ movd ebp, xmm2 ; save hi
+
+ movdqa [edx], xmm5 ; out lo
+
+@@ -1048,7 +1048,7 @@ loop2e1:
+ ; l[n] = src[2n] + ((h[n - 1] + h[n]) >> 1)
+ movdqa xmm7, xmm5
+ pslldq xmm7, 2
+- movd xmm6, ebx
++ movd xmm6, ebp
+ por xmm7, xmm6
+ paddw xmm5, xmm7
+ psraw xmm5, 1
+--- a/xorgxrdp/module/rdpSimd.c
++++ b/xorgxrdp/module/rdpSimd.c
+@@ -98,8 +98,11 @@ rdpSimdInit(ScreenPtr pScreen, ScrnInfoP
+ "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx));
+ if (dx & (1 << 26)) /* SSE 2 */
+ {
++#ifndef PIC
++ /*XXX TODO: make these two PIC compatible */
+ dev->yv12_to_rgb32 = yv12_to_rgb32_x86_sse2;
+ dev->i420_to_rgb32 = i420_to_rgb32_x86_sse2;
++#endif
+ dev->yuy2_to_rgb32 = yuy2_to_rgb32_x86_sse2;
+ dev->uyvy_to_rgb32 = uyvy_to_rgb32_x86_sse2;
+ dev->a8r8g8b8_to_a8b8g8r8_box = a8r8g8b8_to_a8b8g8r8_box_x86_sse2;
+--- a/xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
++++ b/xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
+@@ -66,10 +66,10 @@ loop_y:
+ loop_xpre:
+ mov eax, esi ; look for aligned
+ and eax, 0x0F ; we can jump to next
+- mov ebx, eax
++ mov ebp, eax
+ mov eax, edi
+ and eax, 0x0F
+- or eax, ebx
++ or eax, ebp
+ cmp eax, 0
+ je done_loop_xpre
+ cmp ecx, 1
+@@ -78,14 +78,14 @@ loop_xpre:
+ lea esi, [esi + 4]
+ mov edx, eax ; a and g
+ and edx, 0xFF00FF00
+- mov ebx, eax ; r
+- and ebx, 0x00FF0000
+- shr ebx, 16
+- or edx, ebx
+- mov ebx, eax ; b
+- and ebx, 0x000000FF
+- shl ebx, 16
+- or edx, ebx
++ mov ebp, eax ; r
++ and ebp, 0x00FF0000
++ shr ebp, 16
++ or edx, ebp
++ mov ebp, eax ; b
++ and ebp, 0x000000FF
++ shl ebp, 16
++ or edx, ebp
+ mov [edi], edx
+ lea edi, [edi + 4]
+ dec ecx
+@@ -145,14 +145,14 @@ loop_x:
+ lea esi, [esi + 4]
+ mov edx, eax ; a and g
+ and edx, 0xFF00FF00
+- mov ebx, eax ; r
+- and ebx, 0x00FF0000
+- shr ebx, 16
+- or edx, ebx
+- mov ebx, eax ; b
+- and ebx, 0x000000FF
+- shl ebx, 16
+- or edx, ebx
++ mov ebp, eax ; r
++ and ebp, 0x00FF0000
++ shr ebp, 16
++ or edx, ebp
++ mov ebp, eax ; b
++ and ebp, 0x000000FF
++ shl ebp, 16
++ or edx, ebp
+ mov [edi], edx
+ lea edi, [edi + 4]
+ dec ecx
+--- a/xorgxrdp/module/x86/a8r8g8b8_to_nv12_box_x86_sse2.asm
++++ b/xorgxrdp/module/x86/a8r8g8b8_to_nv12_box_x86_sse2.asm
+@@ -88,8 +88,8 @@ PROC _a8r8g8b8_to_nv12_box_x86_sse2
+
+ pxor xmm7, xmm7
+
+- mov ebx, LHEIGHT ; ebx = height
+- shr ebx, 1 ; doing 2 lines at a time
++ mov ebp, LHEIGHT ; ebp = height
++ shr ebp, 1 ; doing 2 lines at a time
+
+ row_loop1:
+ mov esi, LS8 ; s8
+@@ -312,7 +312,7 @@ loop1:
+ add eax, LDST_UV_STRIDE ; d8_uv += dst_stride_uv
+ mov LD8_UV, eax
+
+- dec ebx
++ dec ebp
+ jnz row_loop1
+
+ mov eax, 0 ; return value
+--- a/xorgxrdp/module/x86/i420_to_rgb32_x86_sse2.asm
++++ b/xorgxrdp/module/x86/i420_to_rgb32_x86_sse2.asm
+@@ -1,3 +1,4 @@
++;XXX TODO free ebx, then add PIC
+ ;
+ ;Copyright 2014 Jay Sorg
+ ;
+--- a/xorgxrdp/module/x86/yv12_to_rgb32_x86_sse2.asm
++++ b/xorgxrdp/module/x86/yv12_to_rgb32_x86_sse2.asm
+@@ -1,3 +1,4 @@
++;XXX TODO free ebx, then add PIC
+ ;
+ ;Copyright 2014 Jay Sorg
+ ;
diff --git a/debian/patches/series b/debian/patches/series
index e508bb6..608b592 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -7,3 +7,4 @@ sockpath.diff
kfreebsd.diff
systemd.diff
lfs.diff
+i386-pic-asm-part1.diff
diff --git a/debian/rules b/debian/rules
index 2e77a98..e6d15cc 100755
--- a/debian/rules
+++ b/debian/rules
@@ -28,14 +28,15 @@ endif
ifeq (i386,${DEB_HOST_ARCH_CPU})
# The following files must be rewritten to use PIC if -DPIC is passed:
-# - librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
-# - librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
-# - xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
-# - xorgxrdp/module/x86/a8r8g8b8_to_nv12_box_x86_sse2.asm
-# - xorgxrdp/module/x86/i420_to_rgb32_x86_sse2.asm
-# - xorgxrdp/module/x86/uyvy_to_rgb32_x86_sse2.asm
-# - xorgxrdp/module/x86/yuy2_to_rgb32_x86_sse2.asm
-# - xorgxrdp/module/x86/yv12_to_rgb32_x86_sse2.asm
+# - [F] librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse2.asm
+# - [F] librfxcodec/src/x86/rfxcodec_encode_dwt_shift_x86_sse41.asm
+# - [F] xorgxrdp/module/x86/a8r8g8b8_to_a8b8g8r8_box_x86_sse2.asm
+# - [F] xorgxrdp/module/x86/a8r8g8b8_to_nv12_box_x86_sse2.asm
+# - [!] xorgxrdp/module/x86/i420_to_rgb32_x86_sse2.asm
+# - [f] xorgxrdp/module/x86/uyvy_to_rgb32_x86_sse2.asm
+# - [f] xorgxrdp/module/x86/yuy2_to_rgb32_x86_sse2.asm
+# - [!] xorgxrdp/module/x86/yv12_to_rgb32_x86_sse2.asm
+# Key: F = ebx freed (f = was already free), ! = excluded from C for now
# Documentation: http://www.nasm.us/doc/nasmdoc9.html#section-9.2
# Unfortunately, this requires reserving the EBX register, which
# is used extensively by this code; to avoid crashes or security
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-remote/xrdp.git
More information about the pkg-remote-commits
mailing list