From 8b8a2bd94524b8b79af51c54ef2881e247149fc5 Mon Sep 17 00:00:00 2001 From: Ken Thompson <ken@golang.org> Date: Fri, 7 Aug 2009 20:21:04 -0700 Subject: [PATCH] more work on divide by constant. no real change -- disabled because of bugs R=rsc OCL=32923 CL=32923 --- src/cmd/6g/gg.h | 19 ++++ src/cmd/6g/ggen.c | 219 ++++++++++++++++++++++++++++++--------------- src/cmd/6g/gsubr.c | 195 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 360 insertions(+), 73 deletions(-) diff --git a/src/cmd/6g/gg.h b/src/cmd/6g/gg.h index f9f50cc9d4..6ba975bad2 100644 --- a/src/cmd/6g/gg.h +++ b/src/cmd/6g/gg.h @@ -42,6 +42,23 @@ struct Prog void* reg; // pointer to containing Reg struct }; +typedef struct Magic Magic; +struct Magic +{ + int w; // input for both - width + int s; // output for both - shift + int bad; // output for both - unexpected failure + + // magic multiplier for signed literal divisors + int64 sd; // input - literal divisor + int64 sm; // output - multiplier + + // magic multiplier for unsigned literal divisors + uint64 ud; // input - literal divisor + uint64 um; // output - multiplier + int ua; // output - adder +}; + EXTERN Biobuf* bout; EXTERN int32 dynloc; EXTERN uchar reg[D_NONE]; @@ -127,6 +144,8 @@ void afunclit(Addr*); void datagostring(Strlit*, Addr*); int powtwo(Node*); Type* tounsigned(Type*); +void smagic(Magic*); +void umagic(Magic*); /* * obj.c diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c index 4e71f75e5c..b0fa9c7255 100644 --- a/src/cmd/6g/ggen.c +++ b/src/cmd/6g/ggen.c @@ -525,6 +525,36 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) gmove(dx, res); } +static void +savex(int dr, Node *x, Node *oldx, Node *res, Type *t) +{ + int r; + + r = reg[dr]; + + nodreg(x, types[TINT64], dr); + regalloc(x, t, x); + + // save current ax and dx if they are live + // and not the destination + memset(oldx, 0, sizeof *oldx); + if(r > 0 && !samereg(x, res)) { + regalloc(oldx, t, N); + gmove(x, oldx); + } +} + +static void +restx(Node *x, Node *oldx) +{ + regfree(x); + + if(oldx->op != 0) { + gmove(oldx, x); + regfree(oldx); + } +} + /* * generate division according to op, one of: * res = nl / nr @@ -533,18 +563,20 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx) void cgen_div(int op, Node *nl, Node *nr, Node *res) { - Node ax, dx, oldax, olddx, n1, n2, n3; - int rax, rdx, n, w; + Node ax, dx, cx, oldax, olddx, oldcx; + Node n1, n2, n3, savl, savr; + int n, w, s; + Magic m; if(nl->ullman >= UINF) { - tempname(&n1, nl->type); - cgen(nl, &n1); - nl = &n1; + tempname(&savl, nl->type); + cgen(nl, &savl); + nl = &savl; } if(nr->ullman >= UINF) { - tempname(&n2, nr->type); - cgen(nr, &n2); - nr = &n2; + tempname(&savr, nr->type); + cgen(nr, &savr); + nr = &savr; } if(nr->op != OLITERAL) @@ -552,8 +584,14 @@ cgen_div(int op, Node *nl, Node *nr, Node *res) // special cases of mod/div // by a constant - n = powtwo(nr); w = nl->type->width*8; + s = 0; + n = powtwo(nr); + if(n >= 1000) { + // negative power of 2 + s = 1; + n -= 1000; + } if(n+1 >= w) { // just sign bit @@ -571,97 +609,132 @@ cgen_div(int op, Node *nl, Node *nr, Node *res) switch(n) { case 0: // divide by 1 - cgen(nl, res); - return; - case 1: - // divide by 2 regalloc(&n1, nl->type, res); cgen(nl, &n1); - if(issigned[nl->type->etype]) { - // develop -1 iff nl is negative - regalloc(&n2, nl->type, N); - gmove(&n1, &n2); - nodconst(&n3, nl->type, w-1); - gins(optoas(ORSH, nl->type), &n3, &n2); - gins(optoas(OSUB, nl->type), &n2, &n1); - regfree(&n2); - } - nodconst(&n2, nl->type, n); - gins(optoas(ORSH, nl->type), &n2, &n1); + if(s) + gins(optoas(OMINUS, nl->type), N, &n1); gmove(&n1, res); regfree(&n1); return; + case 1: + // divide by 2 + regalloc(&n1, nl->type, res); + cgen(nl, &n1); + if(!issigned[nl->type->etype]) + break; + + // develop -1 iff nl is negative + regalloc(&n2, nl->type, N); + gmove(&n1, &n2); + nodconst(&n3, nl->type, w-1); + gins(optoas(ORSH, nl->type), &n3, &n2); + gins(optoas(OSUB, nl->type), &n2, &n1); + regfree(&n2); + break; default: regalloc(&n1, nl->type, res); cgen(nl, &n1); - if(issigned[nl->type->etype]) { - // develop (2^k)-1 iff nl is negative - regalloc(&n2, nl->type, N); - gmove(&n1, &n2); - nodconst(&n3, nl->type, w-1); - gins(optoas(ORSH, nl->type), &n3, &n2); - nodconst(&n3, nl->type, w-n); - gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2); - gins(optoas(OADD, nl->type), &n2, &n1); - regfree(&n2); - } - nodconst(&n2, nl->type, n); - gins(optoas(ORSH, nl->type), &n2, &n1); - gmove(&n1, res); - regfree(&n1); + if(!issigned[nl->type->etype]) + break; + + // develop (2^k)-1 iff nl is negative + regalloc(&n2, nl->type, N); + gmove(&n1, &n2); + nodconst(&n3, nl->type, w-1); + gins(optoas(ORSH, nl->type), &n3, &n2); + nodconst(&n3, nl->type, w-n); + gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2); + gins(optoas(OADD, nl->type), &n2, &n1); + regfree(&n2); + break; } + nodconst(&n2, nl->type, n); + gins(optoas(ORSH, nl->type), &n2, &n1); + if(s) + gins(optoas(OMINUS, nl->type), N, &n1); + gmove(&n1, res); + regfree(&n1); return; divbymul: +goto longdiv; switch(simtype[nl->type->etype]) { default: goto longdiv; - case TINT32: + case TUINT16: case TUINT32: - case TINT64: case TUINT64: + m.w = w; + m.ud = mpgetfix(nr->val.u.xval); + umagic(&m); + if(m.bad) + break; + if(op == OMOD) { + // todo + break; + } + if(m.ua != 0) { + // todo fixup + break; + } break; - } - // todo - goto longdiv; -longdiv: - rax = reg[D_AX]; - rdx = reg[D_DX]; + case TINT16: + case TINT32: + case TINT64: + m.w = w; + m.sd = mpgetfix(nr->val.u.xval); + smagic(&m); + if(m.bad) + break; + if(op == OMOD) { + // todo + break; + } + if(m.sm < 0) { + // todo fixup + break; + } - nodreg(&ax, types[TINT64], D_AX); - nodreg(&dx, types[TINT64], D_DX); - regalloc(&ax, nl->type, &ax); - regalloc(&dx, nl->type, &dx); + savex(D_AX, &ax, &oldax, res, nl->type); + savex(D_DX, &dx, &olddx, res, nl->type); + savex(D_CX, &cx, &oldcx, res, nl->type); - // save current ax and dx if they are live - // and not the destination - memset(&oldax, 0, sizeof oldax); - memset(&olddx, 0, sizeof olddx); - if(rax > 0 && !samereg(&ax, res)) { - regalloc(&oldax, nl->type, N); - gmove(&ax, &oldax); - } - if(rdx > 0 && !samereg(&dx, res)) { - regalloc(&olddx, nl->type, N); - gmove(&dx, &olddx); - } + regalloc(&n1, nl->type, N); + cgen(nl, &n1); // num -> reg(n1) - dodiv(op, nl, nr, res, &ax, &dx); + nodconst(&n2, nl->type, m.sm); + gmove(&n2, &ax); // const->ax - regfree(&ax); - regfree(&dx); + gins(optoas(OMUL, nl->type), &n1, N); // imul reg - if(oldax.op != 0) { - gmove(&oldax, &ax); - regfree(&oldax); - } - if(olddx.op != 0) { - gmove(&olddx, &dx); - regfree(&olddx); + nodconst(&n2, nl->type, m.s); + gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx + + nodconst(&n2, nl->type, w-1); + gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg + gins(optoas(OSUB, nl->type), &n1, &dx); // added + + if(m.sd < 0) + gins(optoas(OMINUS, nl->type), N, &dx); + + regfree(&n1); + gmove(&dx, res); + + restx(&ax, &oldax); + restx(&dx, &olddx); + restx(&cx, &oldcx); + return; } + goto longdiv; +longdiv: + savex(D_AX, &ax, &oldax, res, nl->type); + savex(D_DX, &dx, &olddx, res, nl->type); + dodiv(op, nl, nr, res, &ax, &dx); + restx(&ax, &oldax); + restx(&dx, &olddx); } /* diff --git a/src/cmd/6g/gsubr.c b/src/cmd/6g/gsubr.c index 136a8d5efe..c9d7980ebc 100644 --- a/src/cmd/6g/gsubr.c +++ b/src/cmd/6g/gsubr.c @@ -1881,6 +1881,17 @@ powtwo(Node *n) b = b<<1; } + if(!issigned[n->type->etype]) + goto no; + + v = -v; + b = 1ULL; + for(i=0; i<64; i++) { + if(b == v) + return i+1000; + b = b<<1; + } + no: return -1; } @@ -1894,6 +1905,7 @@ tounsigned(Type *t) switch(t->etype) { default: print("tounsigned: unknown type %T\n", t); + t = T; break; case TINT: t = types[TUINT]; @@ -1913,3 +1925,186 @@ tounsigned(Type *t) } return t; } + +void +smagic(Magic *m) +{ + int p; + uint64 ad, anc, delta, q1, r1, q2, r2, t, two31; + uint64 mask; + + m->bad = 0; + switch(m->w) { + default: + m->bad = 1; + return; + case 8: + mask = 0xffLL; + break; + case 16: + mask = 0xffffLL; + break; + case 32: + mask = 0xffffffffLL; + break; + case 64: + mask = 0xffffffffffffffffLL; + break; + } + two31 = mask ^ (mask>>1); + + p = m->w-1; + ad = m->sd; + if(m->sd < 0) + ad = -m->sd; + + // bad denominators + if(ad == 0 || ad == 1 || ad == two31) { + m->bad = 1; + return; + } + + t = two31; + ad &= mask; + + anc = t - 1 - t%ad; + anc &= mask; + + q1 = two31/anc; + r1 = two31 - q1*anc; + q1 &= mask; + r1 &= mask; + + q2 = two31/ad; + r2 = two31 - q2*ad; + q2 &= mask; + r2 &= mask; + + for(;;) { + p++; + q1 <<= 1; + r1 <<= 1; + q1 &= mask; + r1 &= mask; + if(r1 >= anc) { + q1++; + r1 -= anc; + q1 &= mask; + r1 &= mask; + } + + q2 <<= 1; + r2 <<= 1; + q2 &= mask; + r2 &= mask; + if(r2 >= ad) { + q2++; + r2 -= ad; + q2 &= mask; + r2 &= mask; + } + + delta = ad - r2; + delta &= mask; + if(q1 < delta || (q1 == delta && r1 == 0)) { + continue; + } + break; + } + + m->sm = q2+1; + m->s = p-m->w; +} + +void +umagic(Magic *m) +{ + int p; + uint64 nc, delta, q1, r1, q2, r2, two31; + uint64 mask; + + m->bad = 0; + m->ua = 0; + + switch(m->w) { + default: + m->bad = 1; + return; + case 8: + mask = 0xffLL; + break; + case 16: + mask = 0xffffLL; + break; + case 32: + mask = 0xffffffffLL; + break; + case 64: + mask = 0xffffffffffffffffLL; + break; + } + two31 = mask ^ (mask>>1); + + m->ud &= mask; + if(m->ud == 0 || m->ud == two31) { + m->bad = 1; + return; + } + nc = mask - (-m->ud&mask)%m->ud; + p = m->w-1; + + q1 = two31/nc; + r1 = two31 - q1*nc; + q1 &= mask; + r1 &= mask; + + q2 = (two31-1) / m->ud; + r2 = (two31-1) - q2*m->ud; + q2 &= mask; + r2 &= mask; + + for(;;) { + p++; + if(r1 >= nc-r1) { + q1 <<= 1; + q1++; + r1 <<= 1; + r1 -= nc; + } else { + q1 <<= 1; + r1 <<= 1; + } + q1 &= mask; + r1 &= mask; + if(r2+1 >= m->ud-r2) { + if(q2 >= two31-1) { + m->ua = 1; + } + q2 <<= 1; + q2++; + r2 <<= 1; + r2++; + r2 -= m->ud; + } else { + if(q2 >= two31) { + m->ua = 1; + } + q2 <<= 1; + r2 <<= 1; + r2++; + } + q2 &= mask; + r2 &= mask; + + delta = m->ud - 1 - r2; + delta &= mask; + + if(p < m->w+m->w) + if(q1 < delta || (q1 == delta && r1 == 0)) { + continue; + } + break; + } + m->um = q2+1; + m->s = p-m->w; +} -- 2.30.9