go1.2rc2

309e1655 · Andrew Gerrand · 04e95a1a · 309e1655 · 04e95a1a · 04e95a1a
Commit 309e1655 authored Oct 18, 2013 by Andrew Gerrand
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 964 deletions

VERSION VERSION +1 -0

src/cmd/prof/Makefile src/cmd/prof/Makefile +0 -5

src/cmd/prof/doc.go src/cmd/prof/doc.go +0 -49

src/cmd/prof/main.c src/cmd/prof/main.c +0 -910

No files found.
--- a/VERSION
+++ b/VERSION
+go1.2rc2
\ No newline at end of file
--- a/src/cmd/prof/Makefile
+++ b/src/cmd/prof/Makefile
-# Copyright 2012 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-include ../../Make.dist
--- a/src/cmd/prof/doc.go
+++ b/src/cmd/prof/doc.go
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-/*
-
-Prof is a rudimentary real-time profiler.
-
-Given a command to run or the process id (pid) of a command already
-running, it samples the program's state at regular intervals and reports
-on its behavior.  With no options, it prints a histogram of the locations
-in the code that were sampled during execution.
-
-Since it is a real-time profiler, unlike a traditional profiler it samples
-the program's state even when it is not running, such as when it is
-asleep or waiting for I/O.  Each thread contributes equally to the
-statistics.
-
-Usage:
-	go tool prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]
-
-The output modes (default -h) are:
-
-	-P file.prof:
-		Write the profile information to file.prof, in the format used by pprof.
-		At the moment, this only works on Linux amd64 binaries and requires that the
-		binary be written using 6l -e to produce ELF debug info.
-		See http://code.google.com/p/google-perftools for details.
-	-h: histograms
-		How many times a sample occurred at each location.
-	-f: dynamic functions
-		At each sample period, print the name of the executing function.
-	-l: dynamic file and line numbers
-		At each sample period, print the file and line number of the executing instruction.
-	-r: dynamic registers
-		At each sample period, print the register contents.
-	-s: dynamic function stack traces
-		At each sample period, print the symbolic stack trace.
-
-Flag -t sets the maximum real time to sample, in seconds, and -d
-sets the sampling interval in milliseconds.  The default is to sample
-every 100ms until the program completes.
-
-It is installed as go tool prof and is architecture-independent.
-
-*/
-package main
--- a/src/cmd/prof/main.c
+++ b/src/cmd/prof/main.c
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !plan9
-
-#include <u.h>
-#include <time.h>
-#include <libc.h>
-#include <bio.h>
-#include <ctype.h>
-
-#define Ureg Ureg_amd64
-	#include <ureg_amd64.h>
-#undef Ureg
-#define Ureg Ureg_x86
-	#include <ureg_x86.h>
-#undef Ureg
-#include <mach.h>
-
-char* file = "6.out";
-static Fhdr fhdr;
-int have_syms;
-int fd;
-struct Ureg_amd64 ureg_amd64;
-struct Ureg_x86 ureg_x86;
-int total_sec = 0;
-int delta_msec = 100;
-int nsample;
-int nsamplethread;
-
-// pprof data, stored as sequences of N followed by N PC values.
-// See http://code.google.com/p/google-perftools .
-uvlong	*ppdata;	// traces
-Biobuf*	pproffd;	// file descriptor to write trace info
-long	ppstart;	// start position of current trace
-long	nppdata;	// length of data
-long	ppalloc;	// size of allocated data
-char	ppmapdata[10*1024];	// the map information for the output file
-
-// output formats
-int pprof;	// print pprof output to named file
-int functions;	// print functions
-int histograms;	// print histograms
-int linenums;	// print file and line numbers rather than function names
-int registers;	// print registers
-int stacks;		// print stack traces
-
-int pid;		// main process pid
-
-int nthread;	// number of threads
-int thread[32];	// thread pids
-Map *map[32];	// thread maps
-
-void
-Usage(void)
-{
-	fprint(2, "Usage: prof -p pid [-t total_secs] [-d delta_msec]\n");
-	fprint(2, "       prof [-t total_secs] [-d delta_msec] 6.out args ...\n");
-	fprint(2, "\tformats (default -h):\n");
-	fprint(2, "\t\t-P file.prof: write [c]pprof output to file.prof\n");
-	fprint(2, "\t\t-h: histograms\n");
-	fprint(2, "\t\t-f: dynamic functions\n");
-	fprint(2, "\t\t-l: dynamic file and line numbers\n");
-	fprint(2, "\t\t-r: dynamic registers\n");
-	fprint(2, "\t\t-s: dynamic function stack traces\n");
-	fprint(2, "\t\t-hs: include stack info in histograms\n");
-	exit(2);
-}
-
-typedef struct PC PC;
-struct PC {
-	uvlong pc;
-	uvlong callerpc;
-	unsigned int count;
-	PC* next;
-};
-
-enum {
-	Ncounters = 256
-};
-
-PC *counters[Ncounters];
-
-// Set up by setarch() to make most of the code architecture-independent.
-typedef struct Arch Arch;
-struct Arch {
-	char*	name;
-	void	(*regprint)(void);
-	int	(*getregs)(Map*);
-	int	(*getPC)(Map*);
-	int	(*getSP)(Map*);
-	uvlong	(*uregPC)(void);
-	uvlong	(*uregSP)(void);
-	void	(*ppword)(uvlong w);
-};
-
-void
-amd64_regprint(void)
-{
-	fprint(2, "ax\t0x%llux\n", ureg_amd64.ax);
-	fprint(2, "bx\t0x%llux\n", ureg_amd64.bx);
-	fprint(2, "cx\t0x%llux\n", ureg_amd64.cx);
-	fprint(2, "dx\t0x%llux\n", ureg_amd64.dx);
-	fprint(2, "si\t0x%llux\n", ureg_amd64.si);
-	fprint(2, "di\t0x%llux\n", ureg_amd64.di);
-	fprint(2, "bp\t0x%llux\n", ureg_amd64.bp);
-	fprint(2, "r8\t0x%llux\n", ureg_amd64.r8);
-	fprint(2, "r9\t0x%llux\n", ureg_amd64.r9);
-	fprint(2, "r10\t0x%llux\n", ureg_amd64.r10);
-	fprint(2, "r11\t0x%llux\n", ureg_amd64.r11);
-	fprint(2, "r12\t0x%llux\n", ureg_amd64.r12);
-	fprint(2, "r13\t0x%llux\n", ureg_amd64.r13);
-	fprint(2, "r14\t0x%llux\n", ureg_amd64.r14);
-	fprint(2, "r15\t0x%llux\n", ureg_amd64.r15);
-	fprint(2, "ds\t0x%llux\n", ureg_amd64.ds);
-	fprint(2, "es\t0x%llux\n", ureg_amd64.es);
-	fprint(2, "fs\t0x%llux\n", ureg_amd64.fs);
-	fprint(2, "gs\t0x%llux\n", ureg_amd64.gs);
-	fprint(2, "type\t0x%llux\n", ureg_amd64.type);
-	fprint(2, "error\t0x%llux\n", ureg_amd64.error);
-	fprint(2, "pc\t0x%llux\n", ureg_amd64.ip);
-	fprint(2, "cs\t0x%llux\n", ureg_amd64.cs);
-	fprint(2, "flags\t0x%llux\n", ureg_amd64.flags);
-	fprint(2, "sp\t0x%llux\n", ureg_amd64.sp);
-	fprint(2, "ss\t0x%llux\n", ureg_amd64.ss);
-}
-
-int
-amd64_getregs(Map *map)
-{
-	int i;
-	union {
-		uvlong regs[1];
-		struct Ureg_amd64 ureg;
-	} u;
-
-	for(i = 0; i < sizeof ureg_amd64; i+=8) {
-		if(get8(map, (uvlong)i, &u.regs[i/8]) < 0)
-			return -1;
-	}
-	ureg_amd64 = u.ureg;
-	return 0;
-}
-
-int
-amd64_getPC(Map *map)
-{
-	uvlong x;
-	int r;
-
-	r = get8(map, offsetof(struct Ureg_amd64, ip), &x);
-	ureg_amd64.ip = x;
-	return r;
-}
-
-int
-amd64_getSP(Map *map)
-{
-	uvlong x;
-	int r;
-
-	r = get8(map, offsetof(struct Ureg_amd64, sp), &x);
-	ureg_amd64.sp = x;
-	return r;
-}
-
-uvlong
-amd64_uregPC(void)
-{
-	return ureg_amd64.ip;
-}
-
-uvlong
-amd64_uregSP(void)
-{
-	return ureg_amd64.sp;
-}
-
-void
-amd64_ppword(uvlong w)
-{
-	uchar buf[8];
-
-	buf[0] = w;
-	buf[1] = w >> 8;
-	buf[2] = w >> 16;
-	buf[3] = w >> 24;
-	buf[4] = w >> 32;
-	buf[5] = w >> 40;
-	buf[6] = w >> 48;
-	buf[7] = w >> 56;
-	Bwrite(pproffd, buf, 8);
-}
-
-void
-x86_regprint(void)
-{
-	fprint(2, "ax\t0x%ux\n", ureg_x86.ax);
-	fprint(2, "bx\t0x%ux\n", ureg_x86.bx);
-	fprint(2, "cx\t0x%ux\n", ureg_x86.cx);
-	fprint(2, "dx\t0x%ux\n", ureg_x86.dx);
-	fprint(2, "si\t0x%ux\n", ureg_x86.si);
-	fprint(2, "di\t0x%ux\n", ureg_x86.di);
-	fprint(2, "bp\t0x%ux\n", ureg_x86.bp);
-	fprint(2, "ds\t0x%ux\n", ureg_x86.ds);
-	fprint(2, "es\t0x%ux\n", ureg_x86.es);
-	fprint(2, "fs\t0x%ux\n", ureg_x86.fs);
-	fprint(2, "gs\t0x%ux\n", ureg_x86.gs);
-	fprint(2, "cs\t0x%ux\n", ureg_x86.cs);
-	fprint(2, "flags\t0x%ux\n", ureg_x86.flags);
-	fprint(2, "pc\t0x%ux\n", ureg_x86.pc);
-	fprint(2, "sp\t0x%ux\n", ureg_x86.sp);
-	fprint(2, "ss\t0x%ux\n", ureg_x86.ss);
-}
-
-int
-x86_getregs(Map *map)
-{
-	int i;
-
-	for(i = 0; i < sizeof ureg_x86; i+=4) {
-		if(get4(map, (uvlong)i, &((uint32*)&ureg_x86)[i/4]) < 0)
-			return -1;
-	}
-	return 0;
-}
-
-int
-x86_getPC(Map* map)
-{
-	return get4(map, offsetof(struct Ureg_x86, pc), &ureg_x86.pc);
-}
-
-int
-x86_getSP(Map* map)
-{
-	return get4(map, offsetof(struct Ureg_x86, sp), &ureg_x86.sp);
-}
-
-uvlong
-x86_uregPC(void)
-{
-	return (uvlong)ureg_x86.pc;
-}
-
-uvlong
-x86_uregSP(void)
-{
-	return (uvlong)ureg_x86.sp;
-}
-
-void
-x86_ppword(uvlong w)
-{
-	uchar buf[4];
-
-	buf[0] = w;
-	buf[1] = w >> 8;
-	buf[2] = w >> 16;
-	buf[3] = w >> 24;
-	Bwrite(pproffd, buf, 4);
-}
-
-Arch archtab[] = {
-	{
-		"amd64",
-		amd64_regprint,
-		amd64_getregs,
-		amd64_getPC,
-		amd64_getSP,
-		amd64_uregPC,
-		amd64_uregSP,
-		amd64_ppword,
-	},
-	{
-		"386",
-		x86_regprint,
-		x86_getregs,
-		x86_getPC,
-		x86_getSP,
-		x86_uregPC,
-		x86_uregSP,
-		x86_ppword,
-	},
-	{
-		nil
-	}
-};
-
-Arch *arch;
-
-int
-setarch(void)
-{
-	int i;
-
-	if(mach != nil) {
-		for(i = 0; archtab[i].name != nil; i++) {
-			if (strcmp(mach->name, archtab[i].name) == 0) {
-				arch = &archtab[i];
-				return 0;
-			}
-		}
-	}
-	return -1;
-}
-
-int
-getthreads(void)
-{
-	int i, j, curn, found;
-	Map *curmap[nelem(map)];
-	int curthread[nelem(map)];
-	static int complained = 0;
-
-	curn = procthreadpids(pid, curthread, nelem(curthread));
-	if(curn <= 0)
-		return curn;
-
-	if(curn > nelem(map)) {
-		if(complained == 0) {
-			fprint(2, "prof: too many threads; limiting to %d\n", nthread, nelem(map));
-			complained = 1;
-		}
-		curn = nelem(map);
-	}
-	if(curn == nthread && memcmp(thread, curthread, curn*sizeof(*thread)) == 0)
-		return curn;	// no changes
-
-	// Number of threads has changed (might be the init case).
-	// A bit expensive but rare enough not to bother being clever.
-	for(i = 0; i < curn; i++) {
-		found = 0;
-		for(j = 0; j < nthread; j++) {
-			if(curthread[i] == thread[j]) {
-				found = 1;
-				curmap[i] = map[j];
-				map[j] = nil;
-				break;
-			}
-		}
-		if(found)
-			continue;
-
-		// map new thread
-		curmap[i] = attachproc(curthread[i], &fhdr);
-		if(curmap[i] == nil) {
-			fprint(2, "prof: can't attach to %d: %r\n", curthread[i]);
-			return -1;
-		}
-	}
-
-	for(j = 0; j < nthread; j++)
-		if(map[j] != nil)
-			detachproc(map[j]);
-
-	nthread = curn;
-	memmove(thread, curthread, nthread*sizeof thread[0]);
-	memmove(map, curmap, sizeof map);
-	return nthread;
-}
-
-int
-sample(Map *map)
-{
-	static int n;
-
-	n++;
-	if(registers) {
-		if(arch->getregs(map) < 0)
-			goto bad;
-	} else {
-		// we need only two registers
-		if(arch->getPC(map) < 0)
-			goto bad;
-		if(arch->getSP(map) < 0)
-			goto bad;
-	}
-	return 1;
-bad:
-	if(n == 1)
-		fprint(2, "prof: can't read registers: %r\n");
-	return 0;
-}
-
-void
-addtohistogram(uvlong pc, uvlong callerpc, uvlong sp)
-{
-	int h;
-	PC *x;
-	
-	USED(sp);
-
-	h = (pc + callerpc*101) % Ncounters;
-	for(x = counters[h]; x != NULL; x = x->next) {
-		if(x->pc == pc && x->callerpc == callerpc) {
-			x->count++;
-			return;
-		}
-	}
-	x = malloc(sizeof(PC));
-	if(x == nil)
-		sysfatal("out of memory");
-	x->pc = pc;
-	x->callerpc = callerpc;
-	x->count = 1;
-	x->next = counters[h];
-	counters[h] = x;
-}
-
-void
-addppword(uvlong pc)
-{
-	if(pc == 0) {
-		return;
-	}
-	if(nppdata == ppalloc) {
-		ppalloc = (1000+nppdata)*2;
-		ppdata = realloc(ppdata, ppalloc * sizeof ppdata[0]);
-		if(ppdata == nil) {
-			fprint(2, "prof: realloc failed: %r\n");
-			exit(2);
-		}
-	}
-	ppdata[nppdata++] = pc;
-}
-
-void
-startpptrace(void)
-{
-	ppstart = nppdata;
-	addppword(~0);
-}
-
-void
-endpptrace(void)
-{
-	ppdata[ppstart] = nppdata-ppstart-1;
-}
-
-uvlong nextpc;
-
-void
-xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym)
-{
-	USED(map);
-
-	char buf[1024];
-	if(sym == nil){
-		fprint(2, "syms\n");
-		return;
-	}
-	if(histograms)
-		addtohistogram(nextpc, pc, sp);
-	if(!histograms || stacks > 1 || pprof) {
-		if(nextpc == 0)
-			nextpc = sym->value;
-		if(stacks){
-			fprint(2, "%s(", sym->name);
-			fprint(2, ")");
-			if(nextpc != sym->value)
-				fprint(2, "+%#llux ", nextpc - sym->value);
-			if(have_syms && linenums && fileline(buf, sizeof buf, pc)) {
-				fprint(2, " %s", buf);
-			}
-			fprint(2, "\n");
-		}
-		if (pprof) {
-			addppword(nextpc);
-		}
-	}
-	nextpc = pc;
-}
-
-void
-stacktracepcsp(Map *map, uvlong pc, uvlong sp)
-{
-	nextpc = pc;
-	if(pprof){
-		startpptrace();
-	}
-	if(machdata->ctrace==nil)
-		fprint(2, "no machdata->ctrace\n");
-	else if(machdata->ctrace(map, pc, sp, 0, xptrace) <= 0)
-		fprint(2, "no stack frame: pc=%#p sp=%#p\n", pc, sp);
-	else {
-		addtohistogram(nextpc, 0, sp);
-		if(stacks)
-			fprint(2, "\n");
-	}
-	if(pprof){
-		endpptrace();
-	}
-}
-
-void
-printpc(Map *map, uvlong pc, uvlong sp)
-{
-	char buf[1024];
-	if(registers)
-		arch->regprint();
-	if(have_syms > 0 && linenums &&  fileline(buf, sizeof buf, pc))
-		fprint(2, "%s\n", buf);
-	if(have_syms > 0 && functions) {
-		symoff(buf, sizeof(buf), pc, CANY);
-		fprint(2, "%s\n", buf);
-	}
-	if(stacks || pprof){
-		stacktracepcsp(map, pc, sp);
-	}
-	else if(histograms){
-		addtohistogram(pc, 0, sp);
-	}
-}
-
-void
-ppmaps(void)
-{
-	int fd, n;
-	char tmp[100];
-	Seg *seg;
-
-	// If it's Linux, the info is in /proc/$pid/maps
-	snprint(tmp, sizeof tmp, "/proc/%d/maps", pid);
-	fd = open(tmp, 0);
-	if(fd >= 0) {
-		n = read(fd, ppmapdata, sizeof ppmapdata - 1);
-		close(fd);
-		if(n < 0) {
-			fprint(2, "prof: can't read %s: %r\n", tmp);
-			exit(2);
-		}
-		ppmapdata[n] = 0;
-		return;
-	}
-
-	// It's probably a mac. Synthesize an entry for the text file.
-	// The register segment may come first but it has a zero offset, so grab the first non-zero offset segment.
-	for(n = 0; n < 3; n++){
-		seg = &map[0]->seg[n];
-		if(seg->b == 0) {
-			continue;
-		}
-		snprint(ppmapdata, sizeof ppmapdata,
-			"%.16x-%.16x r-xp %d 00:00 34968549                           %s\n",
-			seg->b, seg->e, seg->f, "/home/r/6.out"
-		);
-		return;
-	}
-	fprint(2, "prof: no text segment in maps for %s\n", file);
-	exit(2);
-}
-
-void
-samples(void)
-{
-	int i, pid, msec;
-	struct timespec req;
-	int getmaps;
-
-	req.tv_sec = delta_msec/1000;
-	req.tv_nsec = 1000000*(delta_msec % 1000);
-	getmaps = 0;
-	if(pprof)
-		getmaps= 1;
-	for(msec = 0; total_sec <= 0 || msec < 1000*total_sec; msec += delta_msec) {
-		nsample++;
-		nsamplethread += nthread;
-		for(i = 0; i < nthread; i++) {
-			pid = thread[i];
-			if(ctlproc(pid, "stop") < 0)
-				return;
-			if(!sample(map[i])) {
-				ctlproc(pid, "start");
-				return;
-			}
-			printpc(map[i], arch->uregPC(), arch->uregSP());
-			ctlproc(pid, "start");
-		}
-		nanosleep(&req, NULL);
-		getthreads();
-		if(nthread == 0)
-			break;
-		if(getmaps) {
-			getmaps = 0;
-			ppmaps();
-		}
-	}
-}
-
-typedef struct Func Func;
-struct Func
-{
-	Func *next;
-	Symbol s;
-	uint onstack;
-	uint leaf;
-};
-
-Func *func[257];
-int nfunc;
-
-Func*
-findfunc(uvlong pc)
-{
-	Func *f;
-	uint h;
-	Symbol s;
-
-	if(pc == 0)
-		return nil;
-
-	if(!findsym(pc, CTEXT, &s))
-		return nil;
-
-	h = s.value % nelem(func);
-	for(f = func[h]; f != NULL; f = f->next)
-		if(f->s.value == s.value)
-			return f;
-
-	f = malloc(sizeof *f);
-	if(f == nil)
-		sysfatal("out of memory");
-	memset(f, 0, sizeof *f);
-	f->s = s;
-	f->next = func[h];
-	func[h] = f;
-	nfunc++;
-	return f;
-}
-
-int
-compareleaf(const void *va, const void *vb)
-{
-	Func *a, *b;
-
-	a = *(Func**)va;
-	b = *(Func**)vb;
-	if(a->leaf != b->leaf)
-		return b->leaf - a->leaf;
-	if(a->onstack != b->onstack)
-		return b->onstack - a->onstack;
-	return strcmp(a->s.name, b->s.name);
-}
-
-void
-dumphistogram(void)
-{
-	int i, h, n;
-	PC *x;
-	Func *f, **ff;
-
-	if(!histograms)
-		return;
-
-	// assign counts to functions.
-	for(h = 0; h < Ncounters; h++) {
-		for(x = counters[h]; x != NULL; x = x->next) {
-			f = findfunc(x->pc);
-			if(f) {
-				f->onstack += x->count;
-				f->leaf += x->count;
-			}
-			f = findfunc(x->callerpc);
-			if(f)
-				f->leaf -= x->count;
-		}
-	}
-
-	// build array
-	ff = malloc(nfunc*sizeof ff[0]);
-	if(ff == nil)
-		sysfatal("out of memory");
-	n = 0;
-	for(h = 0; h < nelem(func); h++)
-		for(f = func[h]; f != NULL; f = f->next)
-			ff[n++] = f;
-
-	// sort by leaf counts
-	qsort(ff, nfunc, sizeof ff[0], compareleaf);
-
-	// print.
-	fprint(2, "%d samples (avg %.1g threads)\n", nsample, (double)nsamplethread/nsample);
-	for(i = 0; i < nfunc; i++) {
-		f = ff[i];
-		fprint(2, "%6.2f%%\t", 100.0*(double)f->leaf/nsample);
-		if(stacks)
-			fprint(2, "%6.2f%%\t", 100.0*(double)f->onstack/nsample);
-		fprint(2, "%s\n", f->s.name);
-	}
-}
-
-typedef struct Trace Trace;
-struct Trace {
-	int	count;
-	int	npc;
-	uvlong	*pc;
-	Trace	*next;
-};
-
-void
-dumppprof(void)
-{
-	uvlong i, n, *p, *e;
-	int ntrace;
-	Trace *trace, *tp, *up, *prev;
-
-	if(!pprof)
-		return;
-	e = ppdata + nppdata;
-	// Create list of traces.  First, count the traces
-	ntrace = 0;
-	for(p = ppdata; p < e;) {
-		n = *p++;
-		p += n;
-		if(n == 0)
-			continue;
-		ntrace++;
-	}
-	if(ntrace <= 0)
-		return;
-	// Allocate and link the traces together.
-	trace = malloc(ntrace * sizeof(Trace));
-	if(trace == nil)
-		sysfatal("out of memory");
-	tp = trace;
-	for(p = ppdata; p < e;) {
-		n = *p++;
-		if(n == 0)
-			continue;
-		tp->count = 1;
-		tp->npc = n;
-		tp->pc = p;
-		tp->next = tp+1;
-		tp++;
-		p += n;
-	}
-	trace[ntrace-1].next = nil;
-	// Eliminate duplicates.  Lousy algorithm, although not as bad as it looks because
-	// the list collapses fast.
-	for(tp = trace; tp != nil; tp = tp->next) {
-		prev = tp;
-		for(up = tp->next; up != nil; up = up->next) {
-			if(up->npc == tp->npc && memcmp(up->pc, tp->pc, up->npc*sizeof up->pc[0]) == 0) {
-				tp->count++;
-				prev->next = up->next;
-			} else {
-				prev = up;
-			}
-		}
-	}
-	// Write file.
-	// See http://code.google.com/p/google-perftools/source/browse/trunk/doc/cpuprofile-fileformat.html
-	// 1) Header
-	arch->ppword(0);	// must be zero
-	arch->ppword(3);	// 3 words follow in header
-	arch->ppword(0);	// must be zero
-	arch->ppword(delta_msec * 1000);	// sampling period in microseconds
-	arch->ppword(0);	// must be zero (padding)
-	// 2) One record for each trace.
-	for(tp = trace; tp != nil; tp = tp->next) {
-		arch->ppword(tp->count);
-		arch->ppword(tp->npc);
-		for(i = 0; i < tp->npc; i++) {
-			arch->ppword(tp->pc[i]);
-		}
-	}
-	// 3) Binary trailer
-	arch->ppword(0);	// must be zero
-	arch->ppword(1);	// must be one
-	arch->ppword(0);	// must be zero
-	// 4) Mapped objects.
-	Bwrite(pproffd, ppmapdata, strlen(ppmapdata));
-	// 5) That's it.
-	Bterm(pproffd);
-}
-
-int
-startprocess(char **argv)
-{
-	int pid;
-
-	if((pid = fork()) == 0) {
-		pid = getpid();
-		if(ctlproc(pid, "hang") < 0){
-			fprint(2, "prof: child process could not hang\n");
-			exits(0);
-		}
-		execv(argv[0], argv);
-		fprint(2, "prof: could not exec %s: %r\n", argv[0]);
-		exits(0);
-	}
-
-	if(pid == -1) {
-		fprint(2, "prof: could not fork\n");
-		exit(1);
-	}
-	if(ctlproc(pid, "attached") < 0 || ctlproc(pid, "waitstop") < 0) {
-		fprint(2, "prof: could not attach to child process: %r\n");
-		exit(1);
-	}
-	return pid;
-}
-
-void
-detach(void)
-{
-	int i;
-
-	for(i = 0; i < nthread; i++)
-		detachproc(map[i]);
-}
-
-int
-main(int argc, char *argv[])
-{
-	int i;
-	char *ppfile;
-
-	ARGBEGIN{
-	case 'P':
-		pprof =1;
-		ppfile = EARGF(Usage());
-		pproffd = Bopen(ppfile, OWRITE);
-		if(pproffd == nil) {
-			fprint(2, "prof: cannot open %s: %r\n", ppfile);
-			exit(2);
-		}
-		break;
-	case 'd':
-		delta_msec = atoi(EARGF(Usage()));
-		break;
-	case 't':
-		total_sec = atoi(EARGF(Usage()));
-		break;
-	case 'p':
-		pid = atoi(EARGF(Usage()));
-		break;
-	case 'f':
-		functions = 1;
-		break;
-	case 'h':
-		histograms = 1;
-		break;
-	case 'l':
-		linenums = 1;
-		break;
-	case 'r':
-		registers = 1;
-		break;
-	case 's':
-		stacks++;
-		break;
-	default:
-		Usage();
-	}ARGEND
-	if(pid <= 0 && argc == 0)
-		Usage();
-	if(functions+linenums+registers+stacks+pprof == 0)
-		histograms = 1;
-	if(!machbyname("amd64")) {
-		fprint(2, "prof: no amd64 support\n", pid);
-		exit(1);
-	}
-	if(argc > 0)
-		file = argv[0];
-	else if(pid) {
-		file = proctextfile(pid);
-		if (file == NULL) {
-			fprint(2, "prof: can't find file for pid %d: %r\n", pid);
-			fprint(2, "prof: on Darwin, need to provide file name explicitly\n");
-			exit(1);
-		}
-	}
-	fd = open(file, 0);
-	if(fd < 0) {
-		fprint(2, "prof: can't open %s: %r\n", file);
-		exit(1);
-	}
-	if(crackhdr(fd, &fhdr)) {
-		have_syms = syminit(fd, &fhdr);
-		if(!have_syms) {
-			fprint(2, "prof: no symbols for %s: %r\n", file);
-		}
-	} else {
-		fprint(2, "prof: crack header for %s: %r\n", file);
-		exit(1);
-	}
-	if(pid <= 0)
-		pid = startprocess(argv);
-	attachproc(pid, &fhdr);	// initializes thread list
-	if(setarch() < 0) {
-		detach();
-		fprint(2, "prof: can't identify binary architecture for pid %d\n", pid);
-		exit(1);
-	}
-	if(getthreads() <= 0) {
-		detach();
-		fprint(2, "prof: can't find threads for pid %d\n", pid);
-		exit(1);
-	}
-	for(i = 0; i < nthread; i++)
-		ctlproc(thread[i], "start");
-	samples();
-	detach();
-	dumphistogram();
-	dumppprof();
-	exit(0);
-}