From 8b8a2bd94524b8b79af51c54ef2881e247149fc5 Mon Sep 17 00:00:00 2001
From: Ken Thompson <ken@golang.org>
Date: Fri, 7 Aug 2009 20:21:04 -0700
Subject: [PATCH] more work on divide by constant. no real change -- disabled
 because of bugs

R=rsc
OCL=32923
CL=32923
---
 src/cmd/6g/gg.h    |  19 ++++
 src/cmd/6g/ggen.c  | 219 ++++++++++++++++++++++++++++++---------------
 src/cmd/6g/gsubr.c | 195 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 360 insertions(+), 73 deletions(-)

diff --git a/src/cmd/6g/gg.h b/src/cmd/6g/gg.h
index f9f50cc9d4..6ba975bad2 100644
--- a/src/cmd/6g/gg.h
+++ b/src/cmd/6g/gg.h
@@ -42,6 +42,23 @@ struct	Prog
 	void*	reg;		// pointer to containing Reg struct
 };
 
+typedef	struct	Magic Magic;
+struct	Magic
+{
+	int	w;	// input for both - width
+	int	s;	// output for both - shift
+	int	bad;	// output for both - unexpected failure
+
+	// magic multiplier for signed literal divisors
+	int64	sd;	// input - literal divisor
+	int64	sm;	// output - multiplier
+
+	// magic multiplier for unsigned literal divisors
+	uint64	ud;	// input - literal divisor
+	uint64	um;	// output - multiplier
+	int	ua;	// output - adder
+};
+
 EXTERN	Biobuf*	bout;
 EXTERN	int32	dynloc;
 EXTERN	uchar	reg[D_NONE];
@@ -127,6 +144,8 @@ void	afunclit(Addr*);
 void	datagostring(Strlit*, Addr*);
 int	powtwo(Node*);
 Type*	tounsigned(Type*);
+void	smagic(Magic*);
+void	umagic(Magic*);
 
 /*
  * obj.c
diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c
index 4e71f75e5c..b0fa9c7255 100644
--- a/src/cmd/6g/ggen.c
+++ b/src/cmd/6g/ggen.c
@@ -525,6 +525,36 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
 		gmove(dx, res);
 }
 
+static void
+savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
+{
+	int r;
+
+	r = reg[dr];
+
+	nodreg(x, types[TINT64], dr);
+	regalloc(x, t, x);
+
+	// save current ax and dx if they are live
+	// and not the destination
+	memset(oldx, 0, sizeof *oldx);
+	if(r > 0 && !samereg(x, res)) {
+		regalloc(oldx, t, N);
+		gmove(x, oldx);
+	}
+}
+
+static void
+restx(Node *x, Node *oldx)
+{
+	regfree(x);
+
+	if(oldx->op != 0) {
+		gmove(oldx, x);
+		regfree(oldx);
+	}
+}
+
 /*
  * generate division according to op, one of:
  *	res = nl / nr
@@ -533,18 +563,20 @@ dodiv(int op, Node *nl, Node *nr, Node *res, Node *ax, Node *dx)
 void
 cgen_div(int op, Node *nl, Node *nr, Node *res)
 {
-	Node ax, dx, oldax, olddx, n1, n2, n3;
-	int rax, rdx, n, w;
+	Node ax, dx, cx, oldax, olddx, oldcx;
+	Node n1, n2, n3, savl, savr;
+	int n, w, s;
+	Magic m;
 
 	if(nl->ullman >= UINF) {
-		tempname(&n1, nl->type);
-		cgen(nl, &n1);
-		nl = &n1;
+		tempname(&savl, nl->type);
+		cgen(nl, &savl);
+		nl = &savl;
 	}
 	if(nr->ullman >= UINF) {
-		tempname(&n2, nr->type);
-		cgen(nr, &n2);
-		nr = &n2;
+		tempname(&savr, nr->type);
+		cgen(nr, &savr);
+		nr = &savr;
 	}
 
 	if(nr->op != OLITERAL)
@@ -552,8 +584,14 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)
 
 	// special cases of mod/div
 	// by a constant
-	n = powtwo(nr);
 	w = nl->type->width*8;
+	s = 0;
+	n = powtwo(nr);
+	if(n >= 1000) {
+		// negative power of 2
+		s = 1;
+		n -= 1000;
+	}
 
 	if(n+1 >= w) {
 		// just sign bit
@@ -571,97 +609,132 @@ cgen_div(int op, Node *nl, Node *nr, Node *res)
 	switch(n) {
 	case 0:
 		// divide by 1
-		cgen(nl, res);
-		return;
-	case 1:
-		// divide by 2
 		regalloc(&n1, nl->type, res);
 		cgen(nl, &n1);
-		if(issigned[nl->type->etype]) {
-			// develop -1 iff nl is negative
-			regalloc(&n2, nl->type, N);
-			gmove(&n1, &n2);
-			nodconst(&n3, nl->type, w-1);
-			gins(optoas(ORSH, nl->type), &n3, &n2);
-			gins(optoas(OSUB, nl->type), &n2, &n1);
-			regfree(&n2);
-		}
-		nodconst(&n2, nl->type, n);
-		gins(optoas(ORSH, nl->type), &n2, &n1);
+		if(s)
+			gins(optoas(OMINUS, nl->type), N, &n1);
 		gmove(&n1, res);
 		regfree(&n1);
 		return;
+	case 1:
+		// divide by 2
+		regalloc(&n1, nl->type, res);
+		cgen(nl, &n1);
+		if(!issigned[nl->type->etype])
+			break;
+
+		// develop -1 iff nl is negative
+		regalloc(&n2, nl->type, N);
+		gmove(&n1, &n2);
+		nodconst(&n3, nl->type, w-1);
+		gins(optoas(ORSH, nl->type), &n3, &n2);
+		gins(optoas(OSUB, nl->type), &n2, &n1);
+		regfree(&n2);
+		break;
 	default:
 		regalloc(&n1, nl->type, res);
 		cgen(nl, &n1);
-		if(issigned[nl->type->etype]) {
-			// develop (2^k)-1 iff nl is negative
-			regalloc(&n2, nl->type, N);
-			gmove(&n1, &n2);
-			nodconst(&n3, nl->type, w-1);
-			gins(optoas(ORSH, nl->type), &n3, &n2);
-			nodconst(&n3, nl->type, w-n);
-			gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
-			gins(optoas(OADD, nl->type), &n2, &n1);
-			regfree(&n2);
-		}
-		nodconst(&n2, nl->type, n);
-		gins(optoas(ORSH, nl->type), &n2, &n1);
-		gmove(&n1, res);
-		regfree(&n1);
+		if(!issigned[nl->type->etype])
+			break;
+
+		// develop (2^k)-1 iff nl is negative
+		regalloc(&n2, nl->type, N);
+		gmove(&n1, &n2);
+		nodconst(&n3, nl->type, w-1);
+		gins(optoas(ORSH, nl->type), &n3, &n2);
+		nodconst(&n3, nl->type, w-n);
+		gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
+		gins(optoas(OADD, nl->type), &n2, &n1);
+		regfree(&n2);
+		break;
 	}
+	nodconst(&n2, nl->type, n);
+	gins(optoas(ORSH, nl->type), &n2, &n1);
+	if(s)
+		gins(optoas(OMINUS, nl->type), N, &n1);
+	gmove(&n1, res);
+	regfree(&n1);
 	return;
 
 divbymul:
+goto longdiv;
 	switch(simtype[nl->type->etype]) {
 	default:
 		goto longdiv;
 
-	case TINT32:
+	case TUINT16:
 	case TUINT32:
-	case TINT64:
 	case TUINT64:
+		m.w = w;
+		m.ud = mpgetfix(nr->val.u.xval);
+		umagic(&m);
+		if(m.bad)
+			break;
+		if(op == OMOD) {
+			// todo
+			break;
+		}
+		if(m.ua != 0) {
+			// todo fixup
+			break;
+		}
 		break;
-	}
-	// todo
-	goto longdiv;
 
-longdiv:
-	rax = reg[D_AX];
-	rdx = reg[D_DX];
+	case TINT16:
+	case TINT32:
+	case TINT64:
+		m.w = w;
+		m.sd = mpgetfix(nr->val.u.xval);
+		smagic(&m);
+		if(m.bad)
+			break;
+		if(op == OMOD) {
+			// todo
+			break;
+		}
+		if(m.sm < 0) {
+			// todo fixup
+			break;
+		}
 
-	nodreg(&ax, types[TINT64], D_AX);
-	nodreg(&dx, types[TINT64], D_DX);
-	regalloc(&ax, nl->type, &ax);
-	regalloc(&dx, nl->type, &dx);
+		savex(D_AX, &ax, &oldax, res, nl->type);
+		savex(D_DX, &dx, &olddx, res, nl->type);
+		savex(D_CX, &cx, &oldcx, res, nl->type);
 
-	// save current ax and dx if they are live
-	// and not the destination
-	memset(&oldax, 0, sizeof oldax);
-	memset(&olddx, 0, sizeof olddx);
-	if(rax > 0 && !samereg(&ax, res)) {
-		regalloc(&oldax, nl->type, N);
-		gmove(&ax, &oldax);
-	}
-	if(rdx > 0 && !samereg(&dx, res)) {
-		regalloc(&olddx, nl->type, N);
-		gmove(&dx, &olddx);
-	}
+		regalloc(&n1, nl->type, N);
+		cgen(nl, &n1);				// num -> reg(n1)
 
-	dodiv(op, nl, nr, res, &ax, &dx);
+		nodconst(&n2, nl->type, m.sm);
+		gmove(&n2, &ax);			// const->ax
 
-	regfree(&ax);
-	regfree(&dx);
+		gins(optoas(OMUL, nl->type), &n1, N);	// imul reg
 
-	if(oldax.op != 0) {
-		gmove(&oldax, &ax);
-		regfree(&oldax);
-	}
-	if(olddx.op != 0) {
-		gmove(&olddx, &dx);
-		regfree(&olddx);
+		nodconst(&n2, nl->type, m.s);
+		gins(optoas(ORSH, nl->type), &n2, &dx);	// shift dx
+
+		nodconst(&n2, nl->type, w-1);
+		gins(optoas(ORSH, nl->type), &n2, &n1);	// -1 iff num is neg
+		gins(optoas(OSUB, nl->type), &n1, &dx);	// added
+
+		if(m.sd < 0)
+			gins(optoas(OMINUS, nl->type), N, &dx);
+
+		regfree(&n1);
+		gmove(&dx, res);
+
+		restx(&ax, &oldax);
+		restx(&dx, &olddx);
+		restx(&cx, &oldcx);
+		return;
 	}
+	goto longdiv;
 
+longdiv:
+	savex(D_AX, &ax, &oldax, res, nl->type);
+	savex(D_DX, &dx, &olddx, res, nl->type);
+	dodiv(op, nl, nr, res, &ax, &dx);
+	restx(&ax, &oldax);
+	restx(&dx, &olddx);
 }
 
 /*
diff --git a/src/cmd/6g/gsubr.c b/src/cmd/6g/gsubr.c
index 136a8d5efe..c9d7980ebc 100644
--- a/src/cmd/6g/gsubr.c
+++ b/src/cmd/6g/gsubr.c
@@ -1881,6 +1881,17 @@ powtwo(Node *n)
 		b = b<<1;
 	}
 
+	if(!issigned[n->type->etype])
+		goto no;
+
+	v = -v;
+	b = 1ULL;
+	for(i=0; i<64; i++) {
+		if(b == v)
+			return i+1000;
+		b = b<<1;
+	}
+
 no:
 	return -1;
 }
@@ -1894,6 +1905,7 @@ tounsigned(Type *t)
 	switch(t->etype) {
 	default:
 		print("tounsigned: unknown type %T\n", t);
+		t = T;
 		break;
 	case TINT:
 		t = types[TUINT];
@@ -1913,3 +1925,186 @@ tounsigned(Type *t)
 	}
 	return t;
 }
+
+void
+smagic(Magic *m)
+{
+	int p;
+	uint64 ad, anc, delta, q1, r1, q2, r2, t, two31;
+	uint64 mask;
+
+	m->bad = 0;
+	switch(m->w) {
+	default:
+		m->bad = 1;
+		return;
+	case 8:
+		mask = 0xffLL;
+		break;
+	case 16:
+		mask = 0xffffLL;
+		break;
+	case 32:
+		mask = 0xffffffffLL;
+		break;
+	case 64:
+		mask = 0xffffffffffffffffLL;
+		break;
+	}
+	two31 = mask ^ (mask>>1);
+
+	p = m->w-1;
+	ad = m->sd;
+	if(m->sd < 0)
+		ad = -m->sd;
+
+	// bad denominators
+	if(ad == 0 || ad == 1 || ad == two31) {
+		m->bad = 1;
+		return;
+	}
+
+	t = two31;
+	ad &= mask;
+
+	anc = t - 1 - t%ad;
+	anc &= mask;
+
+	q1 = two31/anc;
+	r1 = two31 - q1*anc;
+	q1 &= mask;
+	r1 &= mask;
+
+	q2 = two31/ad;
+	r2 = two31 - q2*ad;
+	q2 &= mask;
+	r2 &= mask;
+
+	for(;;) {
+		p++;
+		q1 <<= 1;
+		r1 <<= 1;
+		q1 &= mask;
+		r1 &= mask;
+		if(r1 >= anc) {
+			q1++;
+			r1 -= anc;
+			q1 &= mask;
+			r1 &= mask;
+		}
+
+		q2 <<= 1;
+		r2 <<= 1;
+		q2 &= mask;
+		r2 &= mask;
+		if(r2 >= ad) {
+			q2++;
+			r2 -= ad;
+			q2 &= mask;
+			r2 &= mask;
+		}
+
+		delta = ad - r2;
+		delta &= mask;
+		if(q1 < delta || (q1 == delta && r1 == 0)) {
+			continue;
+		}
+		break;
+	}
+
+	m->sm = q2+1;
+	m->s = p-m->w;
+}
+
+void
+umagic(Magic *m)
+{
+	int p;
+	uint64 nc, delta, q1, r1, q2, r2, two31;
+	uint64 mask;
+
+	m->bad = 0;
+	m->ua = 0;
+
+	switch(m->w) {
+	default:
+		m->bad = 1;
+		return;
+	case 8:
+		mask = 0xffLL;
+		break;
+	case 16:
+		mask = 0xffffLL;
+		break;
+	case 32:
+		mask = 0xffffffffLL;
+		break;
+	case 64:
+		mask = 0xffffffffffffffffLL;
+		break;
+	}
+	two31 = mask ^ (mask>>1);
+
+	m->ud &= mask;
+	if(m->ud == 0 || m->ud == two31) {
+		m->bad = 1;
+		return;
+	}
+	nc = mask - (-m->ud&mask)%m->ud;
+	p = m->w-1;
+
+	q1 = two31/nc;
+	r1 = two31 - q1*nc;
+	q1 &= mask;
+	r1 &= mask;
+
+	q2 = (two31-1) / m->ud;
+	r2 = (two31-1) - q2*m->ud;
+	q2 &= mask;
+	r2 &= mask;
+
+	for(;;) {
+		p++;
+		if(r1 >= nc-r1) {
+			q1 <<= 1;
+			q1++;
+			r1 <<= 1;
+			r1 -= nc;
+		} else {
+			q1 <<= 1;
+			r1 <<= 1;
+		}
+		q1 &= mask;
+		r1 &= mask;
+		if(r2+1 >= m->ud-r2) {
+			if(q2 >= two31-1) {
+				m->ua = 1;
+			}
+			q2 <<= 1;
+			q2++;
+			r2 <<= 1;
+			r2++;
+			r2 -= m->ud;
+		} else {
+			if(q2 >= two31) {
+				m->ua = 1;
+			}
+			q2 <<= 1;
+			r2 <<= 1;
+			r2++;
+		}
+		q2 &= mask;
+		r2 &= mask;
+
+		delta = m->ud - 1 - r2;
+		delta &= mask;
+
+		if(p < m->w+m->w)
+		if(q1 < delta || (q1 == delta && r1 == 0)) {
+			continue;
+		}
+		break;
+	}
+	m->um = q2+1;
+	m->s = p-m->w;
+}
-- 
2.30.9