diff --git a/src/pkg/runtime/openbsd/386/sys.s b/src/pkg/runtime/openbsd/386/sys.s
index c399c91050f49a860870fcffa15bf900cbb11c21..d2df51827648d46ea67dec396724cc6e14e34a04 100644
--- a/src/pkg/runtime/openbsd/386/sys.s
+++ b/src/pkg/runtime/openbsd/386/sys.s
@@ -269,6 +269,16 @@ TEXT runtime·osyield(SB),7,$-4
 	INT	$0x80
 	RET
 
+TEXT runtime·thrsleep(SB),7,$-4
+	MOVL	$300, AX		// sys_thrsleep
+	INT	$0x80
+	RET
+
+TEXT runtime·thrwakeup(SB),7,$-4
+	MOVL	$301, AX		// sys_thrwakeup
+	INT	$0x80
+	RET
+
 TEXT runtime·sysctl(SB),7,$28
 	LEAL	arg0+0(FP), SI
 	LEAL	4(SP), DI
diff --git a/src/pkg/runtime/openbsd/amd64/defs.h b/src/pkg/runtime/openbsd/amd64/defs.h
index 4eb5cd20565788659eb385f8a41c806bbc19a17d..968f22d586066caa31754a15819f5d5c758cdb7e 100644
--- a/src/pkg/runtime/openbsd/amd64/defs.h
+++ b/src/pkg/runtime/openbsd/amd64/defs.h
@@ -114,6 +114,8 @@ struct Itimerval {
 
 typedef void sfxsave64;
 
+typedef void usavefpu;
+
 typedef struct Sigcontext Sigcontext;
 struct Sigcontext {
 	int64 sc_rdi;
diff --git a/src/pkg/runtime/openbsd/amd64/sys.s b/src/pkg/runtime/openbsd/amd64/sys.s
index b64868f31416267967d7bab19ad7692fcb401f70..29d74a1200d7236c9d18b1d8ce16b4f525aa1734 100644
--- a/src/pkg/runtime/openbsd/amd64/sys.s
+++ b/src/pkg/runtime/openbsd/amd64/sys.s
@@ -62,19 +62,19 @@ TEXT runtime·osyield(SB),7,$0
 	SYSCALL
 	RET
 
-TEXT runtime·sys_thrsleep(SB),7,$0
-	MOVQ 8(SP), DI
-	MOVL 16(SP), SI
-	MOVQ 24(SP), DX
-	MOVQ 32(SP), R10
-	MOVL $300, AX
+TEXT runtime·thrsleep(SB),7,$0
+	MOVQ	8(SP), DI		// arg 1 - ident
+	MOVL	16(SP), SI		// arg 2 - clock_id
+	MOVQ	24(SP), DX		// arg 3 - tp
+	MOVQ	32(SP), R10		// arg 4 - lock
+	MOVL	$300, AX		// sys_thrsleep
 	SYSCALL
 	RET
 
-TEXT runtime·sys_thrwakeup(SB),7,$0
-	MOVQ 8(SP), DI
-	MOVL 16(SP), SI
-	MOVL $301, AX
+TEXT runtime·thrwakeup(SB),7,$0
+	MOVQ	8(SP), DI		// arg 1 - ident
+	MOVL	16(SP), SI		// arg 2 - n
+	MOVL	$301, AX		// sys_thrwakeup
 	SYSCALL
 	RET
 
diff --git a/src/pkg/runtime/openbsd/thread.c b/src/pkg/runtime/openbsd/thread.c
index 2972a3cd41d571cb708bd23edf11081dd68c39cf..48e02b6a77d349e53e567f58beed9d3c3c6d1372 100644
--- a/src/pkg/runtime/openbsd/thread.c
+++ b/src/pkg/runtime/openbsd/thread.c
@@ -6,15 +6,26 @@
 #include "os.h"
 #include "stack.h"
 
-extern SigTab runtime·sigtab[];
-
-extern int64 runtime·rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void));
-
 enum
 {
+	MUTEX_UNLOCKED = 0,
+	MUTEX_LOCKED = 1,
+	MUTEX_SLEEPING = 2,
+
+	ACTIVE_SPIN = 4,
+	ACTIVE_SPIN_CNT = 30,
+	PASSIVE_SPIN = 1,
+
+	ESRCH = 3,
 	ENOTSUP = 91,
 };
 
+extern SigTab runtime·sigtab[];
+
+extern int64 runtime·rfork_thread(int32 flags, void *stack, M *m, G *g, void (*fn)(void));
+extern int32 runtime·thrsleep(void *, void *, void*, void *);
+extern int32 runtime·thrwakeup(void *, int32);
+
 // From OpenBSD's <sys/sysctl.h>
 #define	CTL_HW	6
 #define	HW_NCPU	3
@@ -39,28 +50,86 @@ getncpu(void)
 		return 1;
 }
 
-// Basic spinlocks using CAS. We can improve on these later.
+// Possible lock states are MUTEX_UNLOCKED, MUTEX_LOCKED and MUTEX_SLEEPING.
+// MUTEX_SLEEPING means that there is potentially at least one sleeping thread.
+// Note that there can be spinning threads during all states - they do not
+// affect the mutex's state.
 static void
 lock(Lock *l)
 {
+	uint32 i, v, wait, spin;
+	int32 ret;
+
+	// Speculative grab for lock.
+	v = runtime·xchg(&l->key, MUTEX_LOCKED);
+	if(v == MUTEX_UNLOCKED)
+		return;
+
+	// If we ever change the lock from MUTEX_SLEEPING to some other value,
+	// we must be careful to change it back to MUTEX_SLEEPING before
+	// returning, to ensure that the sleeping thread gets its wakeup call.
+	wait = v;
+
+	// No point spinning unless there are multiple processors.
+	spin = 0;
+	if(runtime·ncpu > 1)
+		spin = ACTIVE_SPIN;
+
 	for(;;) {
-		if(runtime·cas(&l->key, 0, 1))
+		// Try for lock, spinning.
+		for(i = 0; i < spin; i++) {
+			while(l->key == MUTEX_UNLOCKED)
+				if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait))
+					return;
+			runtime·procyield(ACTIVE_SPIN_CNT);
+		}
+
+		// Try for lock, rescheduling.
+		for(i = 0; i < PASSIVE_SPIN; i++) {
+			while(l->key == MUTEX_UNLOCKED)
+				if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait))
+					return;
+			runtime·osyield();
+		}
+
+		// Grab a lock on sema and sleep - sema will be unlocked by
+		// thrsleep() and we'll get woken by another thread.
+		// Note that thrsleep unlocks on a _spinlock_lock_t which is
+		// an int on amd64, so we need to be careful here.
+		while (!runtime·cas(&l->sema, MUTEX_UNLOCKED, MUTEX_LOCKED))
+			runtime·osyield();
+		v = runtime·xchg(&l->key, MUTEX_SLEEPING);
+		if(v == MUTEX_UNLOCKED) {
+			l->sema = MUTEX_UNLOCKED;
 			return;
-		runtime·osyield();
+		}
+		wait = v;
+		ret = runtime·thrsleep(&l->key, 0, 0, &l->sema);
+		if (ret != 0) {
+			runtime·printf("thrsleep addr=%p sema=%d ret=%d\n",
+				&l->key, l->sema, ret);
+			l->sema = MUTEX_UNLOCKED;
+		}
 	}
 }
 
 static void
 unlock(Lock *l)
 {
-	uint32 v;
-
-	for (;;) {
-		v = l->key;
-		if((v&1) == 0)
-			runtime·throw("unlock of unlocked lock");
-		if(runtime·cas(&l->key, v, 0))
-			break;
+	uint32 v, ret;
+
+	while (!runtime·cas(&l->sema, MUTEX_UNLOCKED, MUTEX_LOCKED))
+		runtime·osyield();
+	v = runtime·xchg(&l->key, MUTEX_UNLOCKED);
+	l->sema = MUTEX_UNLOCKED;
+	if(v == MUTEX_UNLOCKED)
+		runtime·throw("unlock of unlocked lock");
+	if(v == MUTEX_SLEEPING) {
+		ret = runtime·thrwakeup(&l->key, 0);
+		if (ret != 0 && ret != ESRCH) {
+			runtime·printf("thrwakeup addr=%p sem=%d ret=%d\n",
+				&l->key, l->sema, ret);
+		}
 	}
 }