[pkg-golang-devel] Bug#874317: golang-1.9: Please add patch to reintroduce POWER5 support

John Paul Adrian Glaubitz glaubitz at physik.fu-berlin.de
Mon Sep 4 23:45:36 UTC 2017


Source: golang-1.9
Version: 1.9-1
Severity: normal
Tags: patch
User: debian-powerpc at lists.debian.org
Usertags: ppc64

Hi!

Starting with golang-1.9, upstream decided to drop support for
POWER5 on big-endian ppc64 systems and raised the minimum
instruction set for these systems to POWER8.

Since Debian's ppc64 port is still and will always be based
on POWER5, I have decided to revert the changes in question
to make golang-1.9 work on POWER5. Luckily, the changes in
question were actually just code clean-ups and simplifications,
none of which had actual performance impact on little-endian
ppc64 systems.

On a sidenote: Raising the instruction set level for the big-
endian ppc64 port to POWER8 actually never made any sense as
every Linux distribution available actually uses POWER5
on big-endian ppc64 systems. If users want to use POWER8,
they have to install a little-endian ppc64 port which is
what most users want anyway due to the improved level of
compatibility with most existing applications. No one will
buy a POWER8-capable machine and run a big-endian ppc64
port on it, there is simply no use case for the changes
upstream introduced.

Thus, it would be great if you could incorporate this patch
to the golang-1.9 Debian package to make it build on ppc64
again. I'm aware that some IBM folk might not agree with
this change, but I think it makes sense for Debian and
its users. We just released our first installation image
for ppc64 ever, so I am expecting a larger number of
Debian ppc64 installations in the future.

Thanks for consideration!

Adrian

--
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaubitz at debian.org
`. `'   Freie Universitaet Berlin - glaubitz at physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913
-------------- next part --------------
Description: Re-add support for POWER5
 Starting with golang-1.9, upstream dropped support for
 POWER5 on big-endian ppc64 systems to clean up the code
 a bit. This patch reverts a number of changes that upstream
 made to remove POWER5 support for ppc64 big-endian. This
 change does not have any negative impact on ppc64 little-
 endian targets but it will allow us to continue using
 golang-1.9 on ppc64 big-endian a little longer.
Author: John Paul Adrian Glaubitz <glaubitz at physik.fu-berlin.de>
Upstream: https://github.com/golang/go/issues/19074
Last-Update: 2017-09-04

Index: golang-1.9-1.9/src/cmd/compile/internal/ppc64/ssa.go
===================================================================
--- golang-1.9-1.9.orig/src/cmd/compile/internal/ppc64/ssa.go
+++ golang-1.9-1.9/src/cmd/compile/internal/ppc64/ssa.go
@@ -13,6 +13,20 @@ import (
 	"math"
 )
 
+var condOps = map[ssa.Op]obj.As{
+	ssa.OpPPC64Equal:        ppc64.ABEQ,
+	ssa.OpPPC64NotEqual:     ppc64.ABNE,
+	ssa.OpPPC64LessThan:     ppc64.ABLT,
+	ssa.OpPPC64GreaterEqual: ppc64.ABGE,
+	ssa.OpPPC64GreaterThan:  ppc64.ABGT,
+	ssa.OpPPC64LessEqual:    ppc64.ABLE,
+
+	ssa.OpPPC64FLessThan:     ppc64.ABLT, // 1 branch for FCMP
+	ssa.OpPPC64FGreaterThan:  ppc64.ABGT, // 1 branch for FCMP
+	ssa.OpPPC64FLessEqual:    ppc64.ABLT, // 2 branches for FCMP <=, second is BEQ
+	ssa.OpPPC64FGreaterEqual: ppc64.ABGT, // 2 branches for FCMP >=, second is BEQ
+}
+
 // iselOp encodes mapping of comparison operations onto ISEL operands
 type iselOp struct {
 	cond        int64
@@ -760,6 +774,27 @@ func ssaGenValue(s *gc.SSAGenState, v *s
 		//   rtmp := 1
 		//   isel rt,0,rtmp,!cond // rt is target in ppc asm
 
+		if v.Block.Func.Config.OldArch {
+			p := s.Prog(ppc64.AMOVD)
+			p.From.Type = obj.TYPE_CONST
+			p.From.Offset = 1
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = v.Reg()
+
+			pb := s.Prog(condOps[v.Op])
+			pb.To.Type = obj.TYPE_BRANCH
+
+			p = s.Prog(ppc64.AMOVD)
+			p.From.Type = obj.TYPE_CONST
+			p.From.Offset = 0
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = v.Reg()
+
+			p = s.Prog(obj.ANOP)
+			gc.Patch(pb, p)
+			break
+		}
+		// Modern PPC uses ISEL
 		p := s.Prog(ppc64.AMOVD)
 		p.From.Type = obj.TYPE_CONST
 		p.From.Offset = 1
@@ -771,6 +806,30 @@ func ssaGenValue(s *gc.SSAGenState, v *s
 	case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion
 		ssa.OpPPC64FGreaterEqual:
 
+		if v.Block.Func.Config.OldArch {
+			p := s.Prog(ppc64.AMOVW)
+			p.From.Type = obj.TYPE_CONST
+			p.From.Offset = 1
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = v.Reg()
+
+			pb0 := s.Prog(condOps[v.Op])
+			pb0.To.Type = obj.TYPE_BRANCH
+			pb1 := s.Prog(ppc64.ABEQ)
+			pb1.To.Type = obj.TYPE_BRANCH
+
+			p = s.Prog(ppc64.AMOVW)
+			p.From.Type = obj.TYPE_CONST
+			p.From.Offset = 0
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = v.Reg()
+
+			p = s.Prog(obj.ANOP)
+			gc.Patch(pb0, p)
+			gc.Patch(pb1, p)
+			break
+		}
+		// Modern PPC uses ISEL
 		p := s.Prog(ppc64.AMOVD)
 		p.From.Type = obj.TYPE_CONST
 		p.From.Offset = 1
Index: golang-1.9-1.9/src/cmd/compile/internal/ssa/config.go
===================================================================
--- golang-1.9-1.9.orig/src/cmd/compile/internal/ssa/config.go
+++ golang-1.9-1.9/src/cmd/compile/internal/ssa/config.go
@@ -35,6 +35,7 @@ type Config struct {
 	noDuffDevice    bool          // Don't use Duff's device
 	nacl            bool          // GOOS=nacl
 	use387          bool          // GO386=387
+	OldArch         bool          // True for older versions of architecture, e.g. true for PPC64BE, false for PPC64LE
 	NeedsFpScratch  bool          // No direct move between GP and FP register sets
 	BigEndian       bool          //
 	sparsePhiCutoff uint64        // Sparse phi location algorithm used above this #blocks*#variables score
@@ -200,6 +201,7 @@ func NewConfig(arch string, types Types,
 		c.hasGReg = true
 		c.noDuffDevice = objabi.GOOS == "darwin" // darwin linker cannot handle BR26 reloc with non-zero addend
 	case "ppc64":
+		c.OldArch = true
 		c.BigEndian = true
 		fallthrough
 	case "ppc64le":
Index: golang-1.9-1.9/src/math/big/arith_ppc64.s
===================================================================
--- /dev/null
+++ golang-1.9-1.9/src/math/big/arith_ppc64.s
@@ -0,0 +1,14 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,ppc64
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ?divWW(SB), NOSPLIT, $0
+	BR ?divWW_g(SB)
+
Index: golang-1.9-1.9/src/math/big/arith_ppc64le.s
===================================================================
--- /dev/null
+++ golang-1.9-1.9/src/math/big/arith_ppc64le.s
@@ -0,0 +1,50 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,ppc64le
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// func divWW(x1, x0, y Word) (q, r Word)
+TEXT ?divWW(SB), NOSPLIT, $0
+	MOVD x1+0(FP), R4
+	MOVD x0+8(FP), R5
+	MOVD y+16(FP), R6
+
+	CMPU R4, R6
+	BGE  divbigger
+
+	// from the programmer's note in ch. 3 of the ISA manual, p.74
+	DIVDEU R6, R4, R3
+	DIVDU  R6, R5, R7
+	MULLD  R6, R3, R8
+	MULLD  R6, R7, R20
+	SUB    R20, R5, R10
+	ADD    R7, R3, R3
+	SUB    R8, R10, R4
+	CMPU   R4, R10
+	BLT    adjust
+	CMPU   R4, R6
+	BLT    end
+
+adjust:
+	MOVD $1, R21
+	ADD  R21, R3, R3
+	SUB  R6, R4, R4
+
+end:
+	MOVD R3, q+24(FP)
+	MOVD R4, r+32(FP)
+
+	RET
+
+divbigger:
+	MOVD $-1, R7
+	MOVD R7, q+24(FP)
+	MOVD R7, r+32(FP)
+	RET
+
Index: golang-1.9-1.9/src/math/big/arith_ppc64x.s
===================================================================
--- golang-1.9-1.9.orig/src/math/big/arith_ppc64x.s
+++ golang-1.9-1.9/src/math/big/arith_ppc64x.s
@@ -198,44 +198,5 @@ end:
 	MOVD R4, c+56(FP)
 	RET
 
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ?divWW(SB), NOSPLIT, $0
-	MOVD x1+0(FP), R4
-	MOVD x0+8(FP), R5
-	MOVD y+16(FP), R6
-
-	CMPU R4, R6
-	BGE  divbigger
-
-	// from the programmer's note in ch. 3 of the ISA manual, p.74
-	DIVDEU R6, R4, R3
-	DIVDU  R6, R5, R7
-	MULLD  R6, R3, R8
-	MULLD  R6, R7, R20
-	SUB    R20, R5, R10
-	ADD    R7, R3, R3
-	SUB    R8, R10, R4
-	CMPU   R4, R10
-	BLT    adjust
-	CMPU   R4, R6
-	BLT    end
-
-adjust:
-	MOVD $1, R21
-	ADD  R21, R3, R3
-	SUB  R6, R4, R4
-
-end:
-	MOVD R3, q+24(FP)
-	MOVD R4, r+32(FP)
-
-	RET
-
-divbigger:
-	MOVD $-1, R7
-	MOVD R7, q+24(FP)
-	MOVD R7, r+32(FP)
-	RET
-
 TEXT ?divWVW(SB), NOSPLIT, $0
 	BR ?divWVW_g(SB)
Index: golang-1.9-1.9/src/runtime/internal/atomic/asm_ppc64x.s
===================================================================
--- golang-1.9-1.9.orig/src/runtime/internal/atomic/asm_ppc64x.s
+++ golang-1.9-1.9/src/runtime/internal/atomic/asm_ppc64x.s
@@ -165,12 +165,32 @@ TEXT runtime?internal?atomic?Store6
 TEXT runtime?internal?atomic?Or8(SB), NOSPLIT, $0-9
 	MOVD	ptr+0(FP), R3
 	MOVBZ	val+8(FP), R4
+#ifdef  GOARCH_ppc64
+	// Align ptr down to 4 bytes so we can use 32-bit load/store.
+	// R5 = (R3 << 0) & ~3
+	RLDCR	$0, R3, $~3, R5
+	// Compute val shift.
+	// Big endian.  ptr = ptr ^ 3
+	XOR	$3, R3
+	// R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8)
+	RLDC	$3, R3, $(3*8), R6
+	// Shift val for aligned ptr.  R4 = val << R6
+	SLD	R6, R4, R4
+	SYNC
+
+again:
+	LWAR	(R5), R6
+	OR	R4, R6
+	STWCCC	R6, (R5)
+	BNE	again
+#else
 	SYNC
 again:
 	LBAR	(R3), R6
 	OR	R4, R6
 	STBCCC	R6, (R3)
 	BNE	again
+#endif
 	ISYNC
 	RET
 
@@ -178,11 +198,34 @@ again:
 TEXT runtime?internal?atomic?And8(SB), NOSPLIT, $0-9
 	MOVD	ptr+0(FP), R3
 	MOVBZ	val+8(FP), R4
+#ifdef  GOARCH_ppc64
+	// Align ptr down to 4 bytes so we can use 32-bit load/store.
+	// R5 = (R3 << 0) & ~3
+	RLDCR	$0, R3, $~3, R5
+	// Compute val shift.
+	// Big endian.  ptr = ptr ^ 3
+	XOR	$3, R3
+	// R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8)
+	RLDC	$3, R3, $(3*8), R6
+	// Shift val for aligned ptr.  R4 = val << R6 | ^(0xFF << R6)
+	MOVD	$0xFF, R7
+	SLD	R6, R4
+	SLD	R6, R7
+	XOR	$-1, R7
+	OR	R7, R4
+	SYNC
+again:
+	LWAR	(R5), R6
+	AND	R4, R6
+	STWCCC	R6, (R5)
+	BNE	again
+#else
 	SYNC
 again:
 	LBAR	(R3),R6
 	AND	R4,R6
 	STBCCC	R6,(R3)
 	BNE	again
+#endif
 	ISYNC
 	RET


More information about the pkg-golang-devel mailing list