Commit c1d06cef authored by Russ Cox's avatar Russ Cox

cmd/8l: add SSE2 instructions

R=ken
CC=golang-dev
https://golang.org/cl/6610065
parent d749783f
...@@ -461,6 +461,112 @@ enum as ...@@ -461,6 +461,112 @@ enum as
AUNDEF, AUNDEF,
// SSE2
AADDPD,
AADDPS,
AADDSD,
AADDSS,
AANDNPD,
AANDNPS,
AANDPD,
AANDPS,
ACMPPD,
ACMPPS,
ACMPSD,
ACMPSS,
ACOMISD,
ACOMISS,
ACVTPL2PD,
ACVTPL2PS,
ACVTPD2PL,
ACVTPD2PS,
ACVTPS2PL,
ACVTPS2PD,
ACVTSD2SL,
ACVTSD2SS,
ACVTSL2SD,
ACVTSL2SS,
ACVTSS2SD,
ACVTSS2SL,
ACVTTPD2PL,
ACVTTPS2PL,
ACVTTSD2SL,
ACVTTSS2SL,
ADIVPD,
ADIVPS,
ADIVSD,
ADIVSS,
AMASKMOVOU,
AMAXPD,
AMAXPS,
AMAXSD,
AMAXSS,
AMINPD,
AMINPS,
AMINSD,
AMINSS,
AMOVAPD,
AMOVAPS,
AMOVO,
AMOVOU,
AMOVHLPS,
AMOVHPD,
AMOVHPS,
AMOVLHPS,
AMOVLPD,
AMOVLPS,
AMOVMSKPD,
AMOVMSKPS,
AMOVNTO,
AMOVNTPD,
AMOVNTPS,
AMOVSD,
AMOVSS,
AMOVUPD,
AMOVUPS,
AMULPD,
AMULPS,
AMULSD,
AMULSS,
AORPD,
AORPS,
APADDQ,
APMAXSW,
APMAXUB,
APMINSW,
APMINUB,
APSADBW,
APSUBB,
APSUBL,
APSUBQ,
APSUBSB,
APSUBSW,
APSUBUSB,
APSUBUSW,
APSUBW,
APUNPCKHQDQ,
APUNPCKLQDQ,
ARCPPS,
ARCPSS,
ARSQRTPS,
ARSQRTSS,
ASQRTPD,
ASQRTPS,
ASQRTSD,
ASQRTSS,
ASUBPD,
ASUBPS,
ASUBSD,
ASUBSS,
AUCOMISD,
AUCOMISS,
AUNPCKHPD,
AUNPCKHPS,
AUNPCKLPD,
AUNPCKLPS,
AXORPD,
AXORPS,
ALAST ALAST
}; };
...@@ -505,17 +611,26 @@ enum ...@@ -505,17 +611,26 @@ enum
D_DR = 43, D_DR = 43,
D_TR = 51, D_TR = 51,
D_NONE = 59, D_X0 = 59,
D_X1,
D_BRANCH = 60, D_X2,
D_EXTERN = 61, D_X3,
D_STATIC = 62, D_X4,
D_AUTO = 63, D_X5,
D_PARAM = 64, D_X6,
D_CONST = 65, D_X7,
D_FCONST = 66,
D_SCONST = 67, D_NONE = 67,
D_ADDR = 68,
D_BRANCH = 68,
D_EXTERN = 69,
D_STATIC = 70,
D_AUTO = 71,
D_PARAM = 72,
D_CONST = 73,
D_FCONST = 74,
D_SCONST = 75,
D_ADDR = 76,
D_FILE, D_FILE,
D_FILE1, D_FILE1,
......
...@@ -203,6 +203,8 @@ enum ...@@ -203,6 +203,8 @@ enum
Ycr0, Ycr1, Ycr2, Ycr3, Ycr4, Ycr5, Ycr6, Ycr7, Ycr0, Ycr1, Ycr2, Ycr3, Ycr4, Ycr5, Ycr6, Ycr7,
Ydr0, Ydr1, Ydr2, Ydr3, Ydr4, Ydr5, Ydr6, Ydr7, Ydr0, Ydr1, Ydr2, Ydr3, Ydr4, Ydr5, Ydr6, Ydr7,
Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7, Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7,
Ymr, Ymm,
Yxr, Yxm,
Ymax, Ymax,
Zxxx = 0, Zxxx = 0,
...@@ -224,10 +226,14 @@ enum ...@@ -224,10 +226,14 @@ enum
Zloop, Zloop,
Zm_o, Zm_o,
Zm_r, Zm_r,
Zm_r_xm,
Zm_r_i_xm,
Zaut_r, Zaut_r,
Zo_m, Zo_m,
Zpseudo, Zpseudo,
Zr_m, Zr_m,
Zr_m_xm,
Zr_m_i_xm,
Zrp_, Zrp_,
Z_ib, Z_ib,
Z_il, Z_il,
...@@ -245,6 +251,8 @@ enum ...@@ -245,6 +251,8 @@ enum
Pm = 0x0f, /* 2byte opcode escape */ Pm = 0x0f, /* 2byte opcode escape */
Pq = 0xff, /* both escape */ Pq = 0xff, /* both escape */
Pb = 0xfe, /* byte operands */ Pb = 0xfe, /* byte operands */
Pf2 = 0xf2, /* xmm escape 1 */
Pf3 = 0xf3, /* xmm escape 2 */
}; };
#pragma varargck type "A" int #pragma varargck type "A" int
......
...@@ -254,6 +254,15 @@ char* regstr[] = ...@@ -254,6 +254,15 @@ char* regstr[] =
"TR5", "TR5",
"TR6", "TR6",
"TR7", "TR7",
"X0",
"X1",
"X2",
"X3",
"X4",
"X5",
"X6",
"X7",
"NONE", /* [D_NONE] */ "NONE", /* [D_NONE] */
}; };
......
...@@ -657,6 +657,13 @@ loop: ...@@ -657,6 +657,13 @@ loop:
case AFDIVRF: case AFDIVRF:
case AFCOMF: case AFCOMF:
case AFCOMFP: case AFCOMFP:
case AMOVSS:
case AADDSS:
case ASUBSS:
case AMULSS:
case ADIVSS:
case ACOMISS:
case AUCOMISS:
if(skip) if(skip)
goto casdef; goto casdef;
if(p->from.type == D_FCONST) { if(p->from.type == D_FCONST) {
...@@ -683,6 +690,13 @@ loop: ...@@ -683,6 +690,13 @@ loop:
case AFDIVRD: case AFDIVRD:
case AFCOMD: case AFCOMD:
case AFCOMDP: case AFCOMDP:
case AMOVSD:
case AADDSD:
case ASUBSD:
case AMULSD:
case ADIVSD:
case ACOMISD:
case AUCOMISD:
if(skip) if(skip)
goto casdef; goto casdef;
if(p->from.type == D_FCONST) { if(p->from.type == D_FCONST) {
......
...@@ -356,6 +356,79 @@ uchar ysvrs[] = ...@@ -356,6 +356,79 @@ uchar ysvrs[] =
Ym, Ynone, Zm_o, 2, Ym, Ynone, Zm_o, 2,
0 0
}; };
uchar yxm[] =
{
Yxm, Yxr, Zm_r_xm, 1,
0
};
uchar yxcvm1[] =
{
Yxm, Yxr, Zm_r_xm, 2,
Yxm, Ymr, Zm_r_xm, 2,
0
};
uchar yxcvm2[] =
{
Yxm, Yxr, Zm_r_xm, 2,
Ymm, Yxr, Zm_r_xm, 2,
0
};
uchar yxmq[] =
{
Yxm, Yxr, Zm_r_xm, 2,
0
};
uchar yxr[] =
{
Yxr, Yxr, Zm_r_xm, 1,
0
};
uchar yxr_ml[] =
{
Yxr, Yml, Zr_m_xm, 1,
0
};
uchar yxcmp[] =
{
Yxm, Yxr, Zm_r_xm, 1,
0
};
uchar yxcmpi[] =
{
Yxm, Yxr, Zm_r_i_xm, 2,
0
};
uchar yxmov[] =
{
Yxm, Yxr, Zm_r_xm, 1,
Yxr, Yxm, Zr_m_xm, 1,
0
};
uchar yxcvfl[] =
{
Yxm, Yrl, Zm_r_xm, 1,
0
};
uchar yxcvlf[] =
{
Yml, Yxr, Zm_r_xm, 1,
0
};
uchar yxcvfq[] =
{
Yxm, Yrl, Zm_r_xm, 2,
0
};
uchar yxcvqf[] =
{
Yml, Yxr, Zm_r_xm, 2,
0
};
uchar yxrrl[] =
{
Yxr, Yrl, Zm_r, 1,
0
};
uchar yprefetch[] = uchar yprefetch[] =
{ {
Ym, Ynone, Zm_o, 2, Ym, Ynone, Zm_o, 2,
...@@ -782,5 +855,110 @@ Optab optab[] = ...@@ -782,5 +855,110 @@ Optab optab[] =
{ AUNDEF, ynone, Px, 0x0f, 0x0b }, { AUNDEF, ynone, Px, 0x0f, 0x0b },
{ AADDPD, yxm, Pq, 0x58 },
{ AADDPS, yxm, Pm, 0x58 },
{ AADDSD, yxm, Pf2, 0x58 },
{ AADDSS, yxm, Pf3, 0x58 },
{ AANDNPD, yxm, Pq, 0x55 },
{ AANDNPS, yxm, Pm, 0x55 },
{ AANDPD, yxm, Pq, 0x54 },
{ AANDPS, yxm, Pq, 0x54 },
{ ACMPPD, yxcmpi, Px, Pe,0xc2 },
{ ACMPPS, yxcmpi, Pm, 0xc2,0 },
{ ACMPSD, yxcmpi, Px, Pf2,0xc2 },
{ ACMPSS, yxcmpi, Px, Pf3,0xc2 },
{ ACOMISD, yxcmp, Pe, 0x2f },
{ ACOMISS, yxcmp, Pm, 0x2f },
{ ACVTPL2PD, yxcvm2, Px, Pf3,0xe6,Pe,0x2a },
{ ACVTPL2PS, yxcvm2, Pm, 0x5b,0,0x2a,0, },
{ ACVTPD2PL, yxcvm1, Px, Pf2,0xe6,Pe,0x2d },
{ ACVTPD2PS, yxm, Pe, 0x5a },
{ ACVTPS2PL, yxcvm1, Px, Pe,0x5b,Pm,0x2d },
{ ACVTPS2PD, yxm, Pm, 0x5a },
{ ACVTSD2SL, yxcvfl, Pf2, 0x2d },
{ ACVTSD2SS, yxm, Pf2, 0x5a },
{ ACVTSL2SD, yxcvlf, Pf2, 0x2a },
{ ACVTSL2SS, yxcvlf, Pf3, 0x2a },
{ ACVTSS2SD, yxm, Pf3, 0x5a },
{ ACVTSS2SL, yxcvfl, Pf3, 0x2d },
{ ACVTTPD2PL, yxcvm1, Px, Pe,0xe6,Pe,0x2c },
{ ACVTTPS2PL, yxcvm1, Px, Pf3,0x5b,Pm,0x2c },
{ ACVTTSD2SL, yxcvfl, Pf2, 0x2c },
{ ACVTTSS2SL, yxcvfl, Pf3, 0x2c },
{ ADIVPD, yxm, Pe, 0x5e },
{ ADIVPS, yxm, Pm, 0x5e },
{ ADIVSD, yxm, Pf2, 0x5e },
{ ADIVSS, yxm, Pf3, 0x5e },
{ AMASKMOVOU, yxr, Pe, 0xf7 },
{ AMAXPD, yxm, Pe, 0x5f },
{ AMAXPS, yxm, Pm, 0x5f },
{ AMAXSD, yxm, Pf2, 0x5f },
{ AMAXSS, yxm, Pf3, 0x5f },
{ AMINPD, yxm, Pe, 0x5d },
{ AMINPS, yxm, Pm, 0x5d },
{ AMINSD, yxm, Pf2, 0x5d },
{ AMINSS, yxm, Pf3, 0x5d },
{ AMOVAPD, yxmov, Pe, 0x28,0x29 },
{ AMOVAPS, yxmov, Pm, 0x28,0x29 },
{ AMOVO, yxmov, Pe, 0x6f,0x7f },
{ AMOVOU, yxmov, Pf3, 0x6f,0x7f },
{ AMOVHLPS, yxr, Pm, 0x12 },
{ AMOVHPD, yxmov, Pe, 0x16,0x17 },
{ AMOVHPS, yxmov, Pm, 0x16,0x17 },
{ AMOVLHPS, yxr, Pm, 0x16 },
{ AMOVLPD, yxmov, Pe, 0x12,0x13 },
{ AMOVLPS, yxmov, Pm, 0x12,0x13 },
{ AMOVMSKPD, yxrrl, Pq, 0x50 },
{ AMOVMSKPS, yxrrl, Pm, 0x50 },
{ AMOVNTO, yxr_ml, Pe, 0xe7 },
{ AMOVNTPD, yxr_ml, Pe, 0x2b },
{ AMOVNTPS, yxr_ml, Pm, 0x2b },
{ AMOVSD, yxmov, Pf2, 0x10,0x11 },
{ AMOVSS, yxmov, Pf3, 0x10,0x11 },
{ AMOVUPD, yxmov, Pe, 0x10,0x11 },
{ AMOVUPS, yxmov, Pm, 0x10,0x11 },
{ AMULPD, yxm, Pe, 0x59 },
{ AMULPS, yxm, Ym, 0x59 },
{ AMULSD, yxm, Pf2, 0x59 },
{ AMULSS, yxm, Pf3, 0x59 },
{ AORPD, yxm, Pq, 0x56 },
{ AORPS, yxm, Pm, 0x56 },
{ APADDQ, yxm, Pe, 0xd4 },
{ APMAXSW, yxm, Pe, 0xee },
{ APMAXUB, yxm, Pe, 0xde },
{ APMINSW, yxm, Pe, 0xea },
{ APMINUB, yxm, Pe, 0xda },
{ APSADBW, yxm, Pq, 0xf6 },
{ APSUBB, yxm, Pe, 0xf8 },
{ APSUBL, yxm, Pe, 0xfa },
{ APSUBQ, yxm, Pe, 0xfb },
{ APSUBSB, yxm, Pe, 0xe8 },
{ APSUBSW, yxm, Pe, 0xe9 },
{ APSUBUSB, yxm, Pe, 0xd8 },
{ APSUBUSW, yxm, Pe, 0xd9 },
{ APSUBW, yxm, Pe, 0xf9 },
{ APUNPCKHQDQ, yxm, Pe, 0x6d },
{ APUNPCKLQDQ, yxm, Pe, 0x6c },
{ ARCPPS, yxm, Pm, 0x53 },
{ ARCPSS, yxm, Pf3, 0x53 },
{ ARSQRTPS, yxm, Pm, 0x52 },
{ ARSQRTSS, yxm, Pf3, 0x52 },
{ ASQRTPD, yxm, Pe, 0x51 },
{ ASQRTPS, yxm, Pm, 0x51 },
{ ASQRTSD, yxm, Pf2, 0x51 },
{ ASQRTSS, yxm, Pf3, 0x51 },
{ ASUBPD, yxm, Pe, 0x5c },
{ ASUBPS, yxm, Pm, 0x5c },
{ ASUBSD, yxm, Pf2, 0x5c },
{ ASUBSS, yxm, Pf3, 0x5c },
{ AUCOMISD, yxcmp, Pe, 0x2e },
{ AUCOMISS, yxcmp, Pm, 0x2e },
{ AUNPCKHPD, yxm, Pe, 0x15 },
{ AUNPCKHPS, yxm, Pm, 0x15 },
{ AUNPCKLPD, yxm, Pe, 0x14 },
{ AUNPCKLPS, yxm, Pm, 0x14 },
{ AXORPD, yxm, Pe, 0x57 },
{ AXORPS, yxm, Pm, 0x57 },
0 0
}; };
...@@ -194,7 +194,7 @@ instinit(void) ...@@ -194,7 +194,7 @@ instinit(void)
for(i=1; optab[i].as; i++) for(i=1; optab[i].as; i++)
if(i != optab[i].as) { if(i != optab[i].as) {
diag("phase error in optab: %d", i); diag("phase error in optab: at %A found %A", i, optab[i].as);
errorexit(); errorexit();
} }
maxop = i; maxop = i;
...@@ -238,6 +238,16 @@ instinit(void) ...@@ -238,6 +238,16 @@ instinit(void)
ycover[Yrl*Ymax + Yml] = 1; ycover[Yrl*Ymax + Yml] = 1;
ycover[Ym*Ymax + Yml] = 1; ycover[Ym*Ymax + Yml] = 1;
ycover[Yax*Ymax + Ymm] = 1;
ycover[Ycx*Ymax + Ymm] = 1;
ycover[Yrx*Ymax + Ymm] = 1;
ycover[Yrl*Ymax + Ymm] = 1;
ycover[Ym*Ymax + Ymm] = 1;
ycover[Ymr*Ymax + Ymm] = 1;
ycover[Ym*Ymax + Yxm] = 1;
ycover[Yxr*Ymax + Yxm] = 1;
for(i=0; i<D_NONE; i++) { for(i=0; i<D_NONE; i++) {
reg[i] = -1; reg[i] = -1;
if(i >= D_AL && i <= D_BH) if(i >= D_AL && i <= D_BH)
...@@ -246,6 +256,8 @@ instinit(void) ...@@ -246,6 +256,8 @@ instinit(void)
reg[i] = (i-D_AX) & 7; reg[i] = (i-D_AX) & 7;
if(i >= D_F0 && i <= D_F0+7) if(i >= D_F0 && i <= D_F0+7)
reg[i] = (i-D_F0) & 7; reg[i] = (i-D_F0) & 7;
if(i >= D_X0 && i <= D_X0+7)
reg[i] = (i-D_X0) & 7;
} }
} }
...@@ -333,6 +345,16 @@ oclass(Adr *a) ...@@ -333,6 +345,16 @@ oclass(Adr *a)
case D_F0+7: case D_F0+7:
return Yrf; return Yrf;
case D_X0+0:
case D_X0+1:
case D_X0+2:
case D_X0+3:
case D_X0+4:
case D_X0+5:
case D_X0+6:
case D_X0+7:
return Yxr;
case D_NONE: case D_NONE:
return Ynone; return Ynone;
...@@ -585,7 +607,7 @@ asmand(Adr *a, int r) ...@@ -585,7 +607,7 @@ asmand(Adr *a, int r)
asmidx(a->scale, a->index, t); asmidx(a->scale, a->index, t);
goto putrelv; goto putrelv;
} }
if(t >= D_AL && t <= D_F0+7) { if(t >= D_AL && t <= D_F7 || t >= D_X0 && t <= D_X7) {
if(v) if(v)
goto bad; goto bad;
*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3); *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
...@@ -827,6 +849,30 @@ subreg(Prog *p, int from, int to) ...@@ -827,6 +849,30 @@ subreg(Prog *p, int from, int to)
print("%P\n", p); print("%P\n", p);
} }
static int
mediaop(Optab *o, int op, int osize, int z)
{
switch(op){
case Pm:
case Pe:
case Pf2:
case Pf3:
if(osize != 1){
if(op != Pm)
*andptr++ = op;
*andptr++ = Pm;
op = o->op[++z];
break;
}
default:
if(andptr == and || andptr[-1] != Pm)
*andptr++ = Pm;
break;
}
*andptr++ = op;
return z;
}
void void
doasm(Prog *p) doasm(Prog *p)
{ {
...@@ -873,6 +919,12 @@ found: ...@@ -873,6 +919,12 @@ found:
*andptr++ = Pm; *andptr++ = Pm;
break; break;
case Pf2: /* xmm opcode escape */
case Pf3:
*andptr++ = o->prefix;
*andptr++ = Pm;
break;
case Pm: /* opcode escape */ case Pm: /* opcode escape */
*andptr++ = Pm; *andptr++ = Pm;
break; break;
...@@ -904,6 +956,17 @@ found: ...@@ -904,6 +956,17 @@ found:
asmand(&p->from, reg[p->to.type]); asmand(&p->from, reg[p->to.type]);
break; break;
case Zm_r_xm:
mediaop(o, op, t[3], z);
asmand(&p->from, reg[p->to.type]);
break;
case Zm_r_i_xm:
mediaop(o, op, t[3], z);
asmand(&p->from, reg[p->to.type]);
*andptr++ = p->to.offset;
break;
case Zaut_r: case Zaut_r:
*andptr++ = 0x8d; /* leal */ *andptr++ = 0x8d; /* leal */
if(p->from.type != D_ADDR) if(p->from.type != D_ADDR)
...@@ -927,6 +990,17 @@ found: ...@@ -927,6 +990,17 @@ found:
asmand(&p->to, reg[p->from.type]); asmand(&p->to, reg[p->from.type]);
break; break;
case Zr_m_xm:
mediaop(o, op, t[3], z);
asmand(&p->to, reg[p->from.type]);
break;
case Zr_m_i_xm:
mediaop(o, op, t[3], z);
asmand(&p->to, reg[p->from.type]);
*andptr++ = p->from.offset;
break;
case Zo_m: case Zo_m:
*andptr++ = op; *andptr++ = op;
asmand(&p->to, o->op[z+1]); asmand(&p->to, o->op[z+1]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment