Commit 6b67f7d6 authored by Carlos Eduardo Seo's avatar Carlos Eduardo Seo

cmd/internal/obj/ppc64: add support for DQ-form instructions

POWER9 (ISA 3.0) introduced a new format of load/store instructions to
implement indexed load/store quadword, using an immediate value instead
of a register index.

This change adds support for this new instruction encoding and adds the
new load/store quadword instructions (lxv/stxv) to the assembler.

This change also adds the missing XX1-form loads/stores (halfword and byte)
included in ISA 3.0.

Change-Id: Ibcdf53c342d7a352d64a9403c2fe7b25be9c3b24
Reviewed-on: https://go-review.googlesource.com/c/go/+/200399
Run-TryBot: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarLynn Boger <laboger@linux.vnet.ibm.com>
parent d2c039fb
......@@ -1011,20 +1011,34 @@ label1:
// <MNEMONIC> (RB)(RA*1),XT produces
// <mnemonic> XT,RA,RB
LXVD2X (R1)(R2*1), VS0
LXVDSX (R1)(R2*1), VS0
LXVW4X (R1)(R2*1), VS0
LXVH8X (R1)(R2*1), VS0
LXVB16X (R1)(R2*1), VS0
LXVDSX (R1)(R2*1), VS0
LXSDX (R1)(R2*1), VS0
LXSIWAX (R1)(R2*1), VS0
LXSIWZX (R1)(R2*1), VS0
// VSX load, DQ-form
// <MNEMONIC> DQ(RA), XS produces
// <mnemonic> XS, DQ(RA)
LXV 32752(R1), VS0
// VSX store, XX1-form
// <MNEMONIC> XS,(RB)(RA*1) produces
// <mnemonic> XS,RA,RB
STXVD2X VS63, (R1)(R2*1)
STXVW4X VS63, (R1)(R2*1)
STXVH8X VS63, (R1)(R2*1)
STXVB16X VS63, (R1)(R2*1)
STXSDX VS63, (R1)(R2*1)
STXSIWX VS63, (R1)(R2*1)
// VSX store, DQ-form
// <MNEMONIC> DQ(RA), XS produces
// <mnemonic> XS, DQ(RA)
STXV VS63, -32752(R1)
// VSX move from VSR, XX1-form
// <MNEMONIC> XS,RA produces
// <mnemonic> RA,XS
......
......@@ -935,11 +935,15 @@ const (
/* VSX */
ALXV
ALXVD2X
ALXVDSX
ALXVW4X
ALXVH8X
ALXVB16X
ALXVDSX
ASTXV
ASTXVD2X
ASTXVW4X
ASTXVH8X
ASTXVB16X
ALXS
ALXSDX
ASTXS
......
......@@ -523,11 +523,15 @@ var Anames = []string{
"VMRGOW",
"LXV",
"LXVD2X",
"LXVDSX",
"LXVW4X",
"LXVH8X",
"LXVB16X",
"LXVDSX",
"STXV",
"STXVD2X",
"STXVW4X",
"STXVH8X",
"STXVB16X",
"LXS",
"LXSDX",
"STXS",
......
......@@ -476,10 +476,12 @@ var optab = []Optab{
{AVSHASIGMA, C_ANDCON, C_VREG, C_ANDCON, C_VREG, 82, 4, 0}, /* vector SHA sigma, vx-form */
/* VSX vector load */
{ALXV, C_SOREG, C_NONE, C_NONE, C_VSREG, 87, 4, 0}, /* vsx vector load, xx1-form */
{ALXVD2X, C_SOREG, C_NONE, C_NONE, C_VSREG, 87, 4, 0}, /* vsx vector load, xx1-form */
{ALXV, C_SOREG, C_NONE, C_NONE, C_VSREG, 96, 4, 0}, /* vsx vector load, dq-form */
/* VSX vector store */
{ASTXV, C_VSREG, C_NONE, C_NONE, C_SOREG, 86, 4, 0}, /* vsx vector store, xx1-form */
{ASTXVD2X, C_VSREG, C_NONE, C_NONE, C_SOREG, 86, 4, 0}, /* vsx vector store, xx1-form */
{ASTXV, C_VSREG, C_NONE, C_NONE, C_SOREG, 97, 4, 0}, /* vsx vector store, dq-form */
/* VSX scalar load */
{ALXS, C_SOREG, C_NONE, C_NONE, C_VSREG, 87, 4, 0}, /* vsx scalar load, xx1-form */
......@@ -1542,14 +1544,22 @@ func buildop(ctxt *obj.Link) {
opset(AVSHASIGMAW, r0)
opset(AVSHASIGMAD, r0)
case ALXV: /* lxvd2x, lxvdsx, lxvw4x */
opset(ALXVD2X, r0)
case ALXVD2X: /* lxvd2x, lxvdsx, lxvw4x, lxvh8x, lxvb16x */
opset(ALXVDSX, r0)
opset(ALXVW4X, r0)
opset(ALXVH8X, r0)
opset(ALXVB16X, r0)
case ASTXV: /* stxvd2x, stxvdsx, stxvw4x */
opset(ASTXVD2X, r0)
case ALXV: /* lxv */
opset(ALXV, r0)
case ASTXVD2X: /* stxvd2x, stxvdsx, stxvw4x, stxvh8x, stxvb16x */
opset(ASTXVW4X, r0)
opset(ASTXVH8X, r0)
opset(ASTXVB16X, r0)
case ASTXV: /* stxv */
opset(ASTXV, r0)
case ALXS: /* lxsdx */
opset(ALXSDX, r0)
......@@ -1981,6 +1991,10 @@ func OPVXX4(o uint32, xo uint32, oe uint32) uint32 {
return o<<26 | xo<<4 | oe<<11
}
func OPDQ(o uint32, xo uint32, oe uint32) uint32 {
return o<<26 | xo | oe<<4
}
func OPVX(o uint32, xo uint32, oe uint32, rc uint32) uint32 {
return o<<26 | xo | oe<<11 | rc&1
}
......@@ -2080,6 +2094,21 @@ func AOP_XX4(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
return op | (xt&31)<<21 | (xa&31)<<16 | (xb&31)<<11 | (xc&31)<<6 | (xc&32)>>2 | (xa&32)>>3 | (xb&32)>>4 | (xt&32)>>5
}
/* DQ-form, VSR register, register + offset operands */
func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 {
/* For the DQ-form encodings, we need the VSX register number to be exactly */
/* between 0-63, so we can properly set the SX bit. */
r := d - REG_VS0
/* The EA for this instruction form is (RA) + DQ << 4, where DQ is a 12-bit signed integer. */
/* In order to match the output of the GNU objdump (and make the usage in Go asm easier), the */
/* instruction is called using the sign extended value (i.e. a valid offset would be -32752 or 32752, */
/* not -2047 or 2047), so 'b' needs to be adjusted to the expected 12-bit DQ value. Bear in mind that */
/* bits 0 to 3 in 'dq' need to be zero, otherwise this will generate an illegal instruction. */
/* If in doubt how this instruction form is encoded, refer to ISA 3.0b, pages 492 and 507. */
dq := b >> 4
return op | (r&31)<<21 | (a&31)<<16 | (dq&4095)<<4 | (r&32)>>2
}
/* Z23-form, 3-register operands + CY field */
func AOP_Z23I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 {
return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<7
......@@ -3686,6 +3715,24 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
rel.Siz = 8
rel.Sym = p.From.Sym
rel.Type = objabi.R_ADDRPOWER_TOCREL_DS
case 96: /* VSX load, DQ-form */
/* reg imm reg */
/* operand order: (RA)(DQ), XT */
dq := int16(c.regoff(&p.From))
if (dq & 15) != 0 {
c.ctxt.Diag("invalid offset for DQ form load/store %v", dq)
}
o1 = AOP_DQ(c.opload(p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(dq))
case 97: /* VSX store, DQ-form */
/* reg imm reg */
/* operand order: XT, (RA)(DQ) */
dq := int16(c.regoff(&p.To))
if (dq & 15) != 0 {
c.ctxt.Diag("invalid offset for DQ form load/store %v", dq)
}
o1 = AOP_DQ(c.opstore(p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(dq))
}
out[0] = o1
......@@ -4888,6 +4935,8 @@ func (c *ctxt9) opload(a obj.As) uint32 {
return OPVCC(33, 0, 0, 0) /* lwzu */
case AMOVW:
return OPVCC(58, 0, 0, 0) | 1<<1 /* lwa */
case ALXV:
return OPDQ(61, 1, 0) /* lxv - ISA v3.00 */
/* no AMOVWU */
case AMOVB, AMOVBZ:
......@@ -5007,14 +5056,16 @@ func (c *ctxt9) oploadx(a obj.As) uint32 {
/* ISA 2.06 enables these for POWER7. */
case ALXVD2X:
return OPVXX1(31, 844, 0) /* lxvd2x - v2.06 */
case ALXVDSX:
return OPVXX1(31, 332, 0) /* lxvdsx - v2.06 */
case ALXVW4X:
return OPVXX1(31, 780, 0) /* lxvw4x - v2.06 */
case ALXVH8X:
return OPVXX1(31, 812, 0) /* lxvh8x - v3.00 */
case ALXVB16X:
return OPVXX1(31, 876, 0) /* lxvb16x - v3.00 */
case ALXVDSX:
return OPVXX1(31, 332, 0) /* lxvdsx - v2.06 */
case ALXSDX:
return OPVXX1(31, 588, 0) /* lxsdx - v2.06 */
case ALXSIWAX:
return OPVXX1(31, 76, 0) /* lxsiwax - v2.07 */
case ALXSIWZX:
......@@ -5065,6 +5116,8 @@ func (c *ctxt9) opstore(a obj.As) uint32 {
return OPVCC(62, 0, 0, 0) /* std */
case AMOVDU:
return OPVCC(62, 0, 0, 1) /* stdu */
case ASTXV:
return OPDQ(61, 5, 0) /* stxv */
}
c.ctxt.Diag("unknown store opcode %v", a)
......@@ -5145,12 +5198,17 @@ func (c *ctxt9) opstorex(a obj.As) uint32 {
return OPVXX1(31, 972, 0) /* stxvd2x - v2.06 */
case ASTXVW4X:
return OPVXX1(31, 908, 0) /* stxvw4x - v2.06 */
case ASTXVH8X:
return OPVXX1(31, 940, 0) /* stxvh8x - v3.00 */
case ASTXVB16X:
return OPVXX1(31, 1004, 0) /* stxvb16x - v3.00 */
case ASTXSDX:
return OPVXX1(31, 716, 0) /* stxsdx - v2.06 */
case ASTXSIWX:
return OPVXX1(31, 140, 0) /* stxsiwx - v2.07 */
/* End of vector scalar instructions */
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment