runtime/pprof: write profiles in protobuf format.

Added functions with suffix proto and stuff from pprof tool to translate to protobuf. Done as the profile proto is more extensible than the legacy pprof format and is pprof's preferred profile format. Large part was taken from https://github.com/google/pprof tool. Tested by hand and compared the result with translated by pprof tool, profiles are identical. Fixes #16093 Change-Id: I5acdb2809cab0d16ed4694fdaa7b8ddfd68df11e Reviewed-on: https://go-review.googlesource.com/30556 Run-TryBot: Michael Matloob <matloob@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Matloob <matloob@golang.org>

runtime/pprof: write profiles in protobuf format.
Added functions with suffix proto and stuff from pprof tool to translate to protobuf. Done as the profile proto is more extensible than the legacy pprof format and is pprof's preferred profile format. Large part was taken from https://github.com/google/pprof tool. Tested by hand and compared the result with translated by pprof tool, profiles are identical. Fixes #16093 Change-Id: I5acdb2809cab0d16ed4694fdaa7b8ddfd68df11e Reviewed-on: https://go-review.googlesource.com/30556 Run-TryBot: Michael Matloob <matloob@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Matloob <matloob@golang.org>
7d14401b · unknown · Michael Matloob · d70b0fe6 · 7d14401b · 7d14401b
Commit 7d14401b authored 8 years ago by unknown Committed by Michael Matloob 8 years ago
11 changed files
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -170,12 +170,14 @@ var pkgDeps = map[string][]string{
 	"log": {"L1", "os", "fmt", "time"},

 	// Packages used by testing must be low-level (L2+fmt).
-	"regexp":         {"L2", "regexp/syntax"},
-	"regexp/syntax":  {"L2"},
-	"runtime/debug":  {"L2", "fmt", "io/ioutil", "os", "time"},
-	"runtime/pprof":  {"L2", "fmt", "os", "text/tabwriter"},
-	"runtime/trace":  {"L0"},
-	"text/tabwriter": {"L2"},
+	"regexp":                            {"L2", "regexp/syntax"},
+	"regexp/syntax":                     {"L2"},
+	"runtime/debug":                     {"L2", "fmt", "io/ioutil", "os", "time"},
+	"runtime/pprof/internal/profile":    {"L2"},
+	"runtime/pprof/internal/protopprof": {"L2", "fmt", "runtime/pprof/internal/profile", "os", "time"},
+	"runtime/pprof":                     {"L2", "fmt", "runtime/pprof/internal/profile", "runtime/pprof/internal/protopprof", "time"},
+	"runtime/trace":                     {"L0"},
+	"text/tabwriter":                    {"L2"},

 	"testing":          {"L2", "flag", "fmt", "os", "runtime/debug", "runtime/pprof", "runtime/trace", "time"},
 	"testing/iotest":   {"L2", "log"},

--- a/src/runtime/crash_cgo_test.go
+++ b/src/runtime/crash_cgo_test.go
@@ -271,10 +271,11 @@ func testCgoPprof(t *testing.T, buildArg, runArg string) {
 	if err != nil {
 		t.Fatal(err)
 	}
+
 	fn := strings.TrimSpace(string(got))
 	defer os.Remove(fn)

-	cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1", exe, fn))
+	cmd := testEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1", "-symbolize=force", exe, fn))

 	found := false
 	for i, e := range cmd.Env {

--- a/src/runtime/pprof/decoder_for_test.go
+++ b/src/runtime/pprof/decoder_for_test.go
--- a/src/runtime/pprof/internal/profile/encode.go
+++ b/src/runtime/pprof/internal/profile/encode.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package profile
+
+import (
+	"sort"
+)
+
+// preEncode populates the unexported fields to be used by encode
+// (with suffix X) from the corresponding exported fields. The
+// exported fields are cleared up to facilitate testing.
+func (p *Profile) preEncode() {
+	strings := make(map[string]int)
+	addString(strings, "")
+
+	for _, st := range p.SampleType {
+		st.typeX = addString(strings, st.Type)
+		st.unitX = addString(strings, st.Unit)
+	}
+
+	for _, s := range p.Sample {
+		s.labelX = nil
+		var keys []string
+		for k := range s.Label {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		for _, k := range keys {
+			vs := s.Label[k]
+			for _, v := range vs {
+				s.labelX = append(s.labelX,
+					Label{
+						keyX: addString(strings, k),
+						strX: addString(strings, v),
+					},
+				)
+			}
+		}
+		var numKeys []string
+		for k := range s.NumLabel {
+			numKeys = append(numKeys, k)
+		}
+		sort.Strings(numKeys)
+		for _, k := range numKeys {
+			vs := s.NumLabel[k]
+			for _, v := range vs {
+				s.labelX = append(s.labelX,
+					Label{
+						keyX: addString(strings, k),
+						numX: v,
+					},
+				)
+			}
+		}
+		s.locationIDX = nil
+		for _, l := range s.Location {
+			s.locationIDX = append(s.locationIDX, l.ID)
+		}
+	}
+
+	for _, m := range p.Mapping {
+		m.fileX = addString(strings, m.File)
+		m.buildIDX = addString(strings, m.BuildID)
+	}
+
+	for _, l := range p.Location {
+		for i, ln := range l.Line {
+			if ln.Function != nil {
+				l.Line[i].functionIDX = ln.Function.ID
+			} else {
+				l.Line[i].functionIDX = 0
+			}
+		}
+		if l.Mapping != nil {
+			l.mappingIDX = l.Mapping.ID
+		} else {
+			l.mappingIDX = 0
+		}
+	}
+	for _, f := range p.Function {
+		f.nameX = addString(strings, f.Name)
+		f.systemNameX = addString(strings, f.SystemName)
+		f.filenameX = addString(strings, f.Filename)
+	}
+
+	if pt := p.PeriodType; pt != nil {
+		pt.typeX = addString(strings, pt.Type)
+		pt.unitX = addString(strings, pt.Unit)
+	}
+
+	p.stringTable = make([]string, len(strings))
+	for s, i := range strings {
+		p.stringTable[i] = s
+	}
+}
+
+func (p *Profile) encode(b *buffer) {
+	for _, x := range p.SampleType {
+		encodeMessage(b, 1, x)
+	}
+	for _, x := range p.Sample {
+		encodeMessage(b, 2, x)
+	}
+	for _, x := range p.Mapping {
+		encodeMessage(b, 3, x)
+	}
+	for _, x := range p.Location {
+		encodeMessage(b, 4, x)
+	}
+	for _, x := range p.Function {
+		encodeMessage(b, 5, x)
+	}
+	encodeStrings(b, 6, p.stringTable)
+	encodeInt64Opt(b, 9, p.TimeNanos)
+	encodeInt64Opt(b, 10, p.DurationNanos)
+	if pt := p.PeriodType; pt != nil && (pt.typeX != 0 || pt.unitX != 0) {
+		encodeMessage(b, 11, p.PeriodType)
+	}
+	encodeInt64Opt(b, 12, p.Period)
+}
+
+func (p *ValueType) encode(b *buffer) {
+	encodeInt64Opt(b, 1, p.typeX)
+	encodeInt64Opt(b, 2, p.unitX)
+}
+
+func (p *Sample) encode(b *buffer) {
+	encodeUint64s(b, 1, p.locationIDX)
+	for _, x := range p.Value {
+		encodeInt64(b, 2, x)
+	}
+	for _, x := range p.labelX {
+		encodeMessage(b, 3, x)
+	}
+}
+
+func (p Label) encode(b *buffer) {
+	encodeInt64Opt(b, 1, p.keyX)
+	encodeInt64Opt(b, 2, p.strX)
+	encodeInt64Opt(b, 3, p.numX)
+}
+
+func (p *Mapping) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.ID)
+	encodeUint64Opt(b, 2, p.Start)
+	encodeUint64Opt(b, 3, p.Limit)
+	encodeUint64Opt(b, 4, p.Offset)
+	encodeInt64Opt(b, 5, p.fileX)
+	encodeInt64Opt(b, 6, p.buildIDX)
+	encodeBoolOpt(b, 7, p.HasFunctions)
+	encodeBoolOpt(b, 8, p.HasFilenames)
+	encodeBoolOpt(b, 9, p.HasLineNumbers)
+	encodeBoolOpt(b, 10, p.HasInlineFrames)
+}
+
+func (p *Location) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.ID)
+	encodeUint64Opt(b, 2, p.mappingIDX)
+	encodeUint64Opt(b, 3, p.Address)
+	for i := range p.Line {
+		encodeMessage(b, 4, &p.Line[i])
+	}
+}
+
+func (p *Line) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.functionIDX)
+	encodeInt64Opt(b, 2, p.Line)
+}
+
+func (p *Function) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.ID)
+	encodeInt64Opt(b, 2, p.nameX)
+	encodeInt64Opt(b, 3, p.systemNameX)
+	encodeInt64Opt(b, 4, p.filenameX)
+	encodeInt64Opt(b, 5, p.StartLine)
+}
+
+func addString(strings map[string]int, s string) int64 {
+	i, ok := strings[s]
+	if !ok {
+		i = len(strings)
+		strings[s] = i
+	}
+	return int64(i)
+}
--- a/src/runtime/pprof/internal/profile/profile.go
+++ b/src/runtime/pprof/internal/profile/profile.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// Package profile provides a representation of profile.proto and
+// methods to encode/decode profiles in this format.
+package profile
+
+import (
+	"io"
+)
+
+// Profile is an in-memory representation of profile.proto.
+type Profile struct {
+	SampleType []*ValueType
+	Sample     []*Sample
+	Mapping    []*Mapping
+	Location   []*Location
+	Function   []*Function
+
+	TimeNanos     int64
+	DurationNanos int64
+	PeriodType    *ValueType
+	Period        int64
+
+	stringTable []string
+}
+
+// ValueType corresponds to Profile.ValueType
+type ValueType struct {
+	Type string // cpu, wall, inuse_space, etc
+	Unit string // seconds, nanoseconds, bytes, etc
+
+	typeX int64
+	unitX int64
+}
+
+// Sample corresponds to Profile.Sample
+type Sample struct {
+	Location []*Location
+	Value    []int64
+	Label    map[string][]string
+	NumLabel map[string][]int64
+
+	locationIDX []uint64
+	labelX      []Label
+}
+
+// Label corresponds to Profile.Label
+type Label struct {
+	keyX int64
+	// Exactly one of the two following values must be set
+	strX int64
+	numX int64 // Integer value for this label
+}
+
+// Mapping corresponds to Profile.Mapping
+type Mapping struct {
+	ID              uint64
+	Start           uint64
+	Limit           uint64
+	Offset          uint64
+	File            string
+	BuildID         string
+	HasFunctions    bool
+	HasFilenames    bool
+	HasLineNumbers  bool
+	HasInlineFrames bool
+
+	fileX    int64
+	buildIDX int64
+}
+
+// Location corresponds to Profile.Location
+type Location struct {
+	ID      uint64
+	Mapping *Mapping
+	Address uint64
+	Line    []Line
+
+	mappingIDX uint64
+}
+
+// Line corresponds to Profile.Line
+type Line struct {
+	Function *Function
+	Line     int64
+
+	functionIDX uint64
+}
+
+// Function corresponds to Profile.Function
+type Function struct {
+	ID         uint64
+	Name       string
+	SystemName string
+	Filename   string
+	StartLine  int64
+
+	nameX       int64
+	systemNameX int64
+	filenameX   int64
+}
+
+// Write writes the profile as a gzip-compressed marshaled protobuf.
+func (p *Profile) Write(w io.Writer) error {
+	p.preEncode()
+	var b buffer
+	p.encode(&b)
+	_, err := w.Write(b.data)
+	return err
+}
--- a/src/runtime/pprof/internal/profile/profile_memmap.go
+++ b/src/runtime/pprof/internal/profile/profile_memmap.go
+package profile
+
+import (
+	"bufio"
+	"errors"
+	"io"
+	"strconv"
+	"strings"
+)
+
+var errUnrecognized = errors.New("unrecognized profile format")
+
+func hasLibFile(file string) string {
+	ix := strings.Index(file, "so")
+	if ix < 1 {
+		return ""
+	}
+	start := ix - 1
+	end := ix + 2
+	s := file[start:end]
+	if end < len(file) {
+		endalt := end
+		if file[endalt] != '.' && file[endalt] != '_' {
+			return s
+		}
+		endalt++
+		for file[endalt] >= '0' && file[endalt] <= '9' {
+			endalt++
+		}
+		if endalt < end+2 {
+			return s
+		}
+		return s[start:endalt]
+	}
+	return s
+}
+
+// massageMappings applies heuristic-based changes to the profile
+// mappings to account for quirks of some environments.
+func (p *Profile) massageMappings() {
+	// Merge adjacent regions with matching names, checking that the offsets match
+	if len(p.Mapping) > 1 {
+		mappings := []*Mapping{p.Mapping[0]}
+		for _, m := range p.Mapping[1:] {
+			lm := mappings[len(mappings)-1]
+			if offset := lm.Offset + (lm.Limit - lm.Start); lm.Limit == m.Start &&
+				offset == m.Offset &&
+				(lm.File == m.File || lm.File == "") {
+				lm.File = m.File
+				lm.Limit = m.Limit
+				if lm.BuildID == "" {
+					lm.BuildID = m.BuildID
+				}
+				p.updateLocationMapping(m, lm)
+				continue
+			}
+			mappings = append(mappings, m)
+		}
+		p.Mapping = mappings
+	}
+
+	// Use heuristics to identify main binary and move it to the top of the list of mappings
+	for i, m := range p.Mapping {
+		file := strings.TrimSpace(strings.Replace(m.File, "(deleted)", "", -1))
+		if len(file) == 0 {
+			continue
+		}
+		if len(hasLibFile(file)) > 0 {
+			continue
+		}
+		if strings.HasPrefix(file, "[") {
+			continue
+		}
+		// Swap what we guess is main to position 0.
+		p.Mapping[0], p.Mapping[i] = p.Mapping[i], p.Mapping[0]
+		break
+	}
+
+	// Keep the mapping IDs neatly sorted
+	for i, m := range p.Mapping {
+		m.ID = uint64(i + 1)
+	}
+}
+
+func (p *Profile) updateLocationMapping(from, to *Mapping) {
+	for _, l := range p.Location {
+		if l.Mapping == from {
+			l.Mapping = to
+		}
+	}
+}
+
+// remapLocationIDs ensures there is a location for each address
+// referenced by a sample, and remaps the samples to point to the new
+// location ids.
+func (p *Profile) remapLocationIDs() {
+	seen := make(map[*Location]bool, len(p.Location))
+	var locs []*Location
+
+	for _, s := range p.Sample {
+		for _, l := range s.Location {
+			if seen[l] {
+				continue
+			}
+			l.ID = uint64(len(locs) + 1)
+			locs = append(locs, l)
+			seen[l] = true
+		}
+	}
+	p.Location = locs
+}
+
+func (p *Profile) remapFunctionIDs() {
+	seen := make(map[*Function]bool, len(p.Function))
+	var fns []*Function
+
+	for _, l := range p.Location {
+		for _, ln := range l.Line {
+			fn := ln.Function
+			if fn == nil || seen[fn] {
+				continue
+			}
+			fn.ID = uint64(len(fns) + 1)
+			fns = append(fns, fn)
+			seen[fn] = true
+		}
+	}
+	p.Function = fns
+}
+
+// remapMappingIDs matches location addresses with existing mappings
+// and updates them appropriately. This is O(N*M), if this ever shows
+// up as a bottleneck, evaluate sorting the mappings and doing a
+// binary search, which would make it O(N*log(M)).
+func (p *Profile) remapMappingIDs() {
+	// Some profile handlers will incorrectly set regions for the main
+	// executable if its section is remapped. Fix them through heuristics.
+
+	if len(p.Mapping) > 0 {
+		// Remove the initial mapping if named '/anon_hugepage' and has a
+		// consecutive adjacent mapping.
+		if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
+			if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
+				p.Mapping = p.Mapping[1:]
+			}
+		}
+	}
+
+	// Subtract the offset from the start of the main mapping if it
+	// ends up at a recognizable start address.
+	if len(p.Mapping) > 0 {
+		const expectedStart = 0x400000
+		if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
+			m.Start = expectedStart
+			m.Offset = 0
+		}
+	}
+
+	// Associate each location with an address to the corresponding
+	// mapping. Create fake mapping if a suitable one isn't found.
+	var fake *Mapping
+nextLocation:
+	for _, l := range p.Location {
+		a := l.Address
+		if l.Mapping != nil || a == 0 {
+			continue
+		}
+		for _, m := range p.Mapping {
+			if m.Start <= a && a < m.Limit {
+				l.Mapping = m
+				continue nextLocation
+			}
+		}
+		// Work around legacy handlers failing to encode the first
+		// part of mappings split into adjacent ranges.
+		for _, m := range p.Mapping {
+			if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
+				m.Start -= m.Offset
+				m.Offset = 0
+				l.Mapping = m
+				continue nextLocation
+			}
+		}
+		// If there is still no mapping, create a fake one.
+		// This is important for the Go legacy handler, which produced
+		// no mappings.
+		if fake == nil {
+			fake = &Mapping{
+				ID:    1,
+				Limit: ^uint64(0),
+			}
+			p.Mapping = append(p.Mapping, fake)
+		}
+		l.Mapping = fake
+	}
+
+	// Reset all mapping IDs.
+	for i, m := range p.Mapping {
+		m.ID = uint64(i + 1)
+	}
+}
+
+func (p *Profile) RemapAll() {
+	p.remapLocationIDs()
+	p.remapFunctionIDs()
+	p.remapMappingIDs()
+}
+
+// ParseProcMaps parses a memory map in the format of /proc/self/maps.
+// ParseMemoryMap should be called after setting on a profile to
+// associate locations to the corresponding mapping based on their
+// address.
+func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
+	var mapping []*Mapping
+
+	b := bufio.NewReader(rd)
+
+	var attrs []string
+	var r *strings.Replacer
+	const delimiter = "="
+	for {
+		l, err := b.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				return nil, err
+			}
+			if l == "" {
+				break
+			}
+		}
+		if l = strings.TrimSpace(l); l == "" {
+			continue
+		}
+
+		if r != nil {
+			l = r.Replace(l)
+		}
+		m, err := parseMappingEntry(l)
+		if err != nil {
+			if err == errUnrecognized {
+				// Recognize assignments of the form: attr=value, and replace
+				// $attr with value on subsequent mappings.
+				if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 {
+					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
+					r = strings.NewReplacer(attrs...)
+				}
+				// Ignore any unrecognized entries
+				continue
+			}
+			return nil, err
+		}
+		if m == nil {
+			continue
+		}
+		mapping = append(mapping, m)
+	}
+	return mapping, nil
+}
+
+// ParseMemoryMap parses a memory map in the format of
+// /proc/self/maps, and overrides the mappings in the current profile.
+// It renumbers the samples and locations in the profile correspondingly.
+func (p *Profile) ParseMemoryMap(rd io.Reader) error {
+	mapping, err := ParseProcMaps(rd)
+	if err != nil {
+		return err
+	}
+	p.Mapping = append(p.Mapping, mapping...)
+	p.massageMappings()
+	p.RemapAll()
+	return nil
+}
+
+func parseMappingEntry(l string) (*Mapping, error) {
+	mapping := &Mapping{}
+	var err error
+	fields := strings.Fields(l)
+	// fmt.Println(len(me), me)
+	if len(fields) == 6 {
+		if !strings.Contains(fields[1], "x") {
+			// Skip non-executable entries.
+			return nil, nil
+		}
+		addrRange := strings.Split(fields[0], "-")
+		if mapping.Start, err = strconv.ParseUint(addrRange[0], 16, 64); err != nil {
+			return nil, errUnrecognized
+		}
+		if mapping.Limit, err = strconv.ParseUint(addrRange[1], 16, 64); err != nil {
+			return nil, errUnrecognized
+		}
+		offset := fields[2]
+		if offset != "" {
+			if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
+				return nil, errUnrecognized
+			}
+		}
+		mapping.File = fields[5]
+		return mapping, nil
+	}
+
+	return nil, errUnrecognized
+}
--- a/src/runtime/pprof/internal/profile/proto.go
+++ b/src/runtime/pprof/internal/profile/proto.go
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file is a simple protocol buffer encoder and decoder.
+//
+// A protocol message must implement the message interface:
+//   decoder() []decoder
+//   encode(*buffer)
+//
+// The decode method returns a slice indexed by field number that gives the
+// function to decode that field.
+// The encode method encodes its receiver into the given buffer.
+//
+// The two methods are simple enough to be implemented by hand rather than
+// by using a protocol compiler.
+//
+// See profile.go for examples of messages implementing this interface.
+//
+// There is no support for groups, message sets, or "has" bits.
+
+package profile
+
+type buffer struct {
+	field int
+	typ   int
+	u64   uint64
+	data  []byte
+	tmp   [16]byte
+}
+
+type message interface {
+	encode(*buffer)
+}
+
+func encodeVarint(b *buffer, x uint64) {
+	for x >= 128 {
+		b.data = append(b.data, byte(x)|0x80)
+		x >>= 7
+	}
+	b.data = append(b.data, byte(x))
+}
+
+func encodeLength(b *buffer, tag int, len int) {
+	encodeVarint(b, uint64(tag)<<3|2)
+	encodeVarint(b, uint64(len))
+}
+
+func encodeUint64(b *buffer, tag int, x uint64) {
+	// append varint to b.data
+	encodeVarint(b, uint64(tag)<<3|0)
+	encodeVarint(b, x)
+}
+
+func encodeUint64s(b *buffer, tag int, x []uint64) {
+	if len(x) > 2 {
+		// Use packed encoding
+		n1 := len(b.data)
+		for _, u := range x {
+			encodeVarint(b, u)
+		}
+		n2 := len(b.data)
+		encodeLength(b, tag, n2-n1)
+		n3 := len(b.data)
+		copy(b.tmp[:], b.data[n2:n3])
+		copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+		copy(b.data[n1:], b.tmp[:n3-n2])
+		return
+	}
+	for _, u := range x {
+		encodeUint64(b, tag, u)
+	}
+}
+
+func encodeUint64Opt(b *buffer, tag int, x uint64) {
+	if x == 0 {
+		return
+	}
+	encodeUint64(b, tag, x)
+}
+
+func encodeInt64(b *buffer, tag int, x int64) {
+	u := uint64(x)
+	encodeUint64(b, tag, u)
+}
+
+func encodeInt64Opt(b *buffer, tag int, x int64) {
+	if x == 0 {
+		return
+	}
+	encodeInt64(b, tag, x)
+}
+
+func encodeInt64s(b *buffer, tag int, x []int64) {
+	if len(x) > 2 {
+		// Use packed encoding
+		n1 := len(b.data)
+		for _, u := range x {
+			encodeVarint(b, uint64(u))
+		}
+		n2 := len(b.data)
+		encodeLength(b, tag, n2-n1)
+		n3 := len(b.data)
+		copy(b.tmp[:], b.data[n2:n3])
+		copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+		copy(b.data[n1:], b.tmp[:n3-n2])
+		return
+	}
+	for _, u := range x {
+		encodeInt64(b, tag, u)
+	}
+}
+
+func encodeString(b *buffer, tag int, x string) {
+	encodeLength(b, tag, len(x))
+	b.data = append(b.data, x...)
+}
+
+func encodeStrings(b *buffer, tag int, x []string) {
+	for _, s := range x {
+		encodeString(b, tag, s)
+	}
+}
+
+func encodeStringOpt(b *buffer, tag int, x string) {
+	if x == "" {
+		return
+	}
+	encodeString(b, tag, x)
+}
+
+func encodeBool(b *buffer, tag int, x bool) {
+	if x {
+		encodeUint64(b, tag, 1)
+	} else {
+		encodeUint64(b, tag, 0)
+	}
+}
+
+func encodeBoolOpt(b *buffer, tag int, x bool) {
+	if x == false {
+		return
+	}
+	encodeBool(b, tag, x)
+}
+
+func encodeMessage(b *buffer, tag int, m message) {
+	n1 := len(b.data)
+	m.encode(b)
+	n2 := len(b.data)
+	encodeLength(b, tag, n2-n1)
+	n3 := len(b.data)
+	copy(b.tmp[:], b.data[n2:n3])
+	copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+	copy(b.data[n1:], b.tmp[:n3-n2])
+}
--- a/src/runtime/pprof/internal/protopprof/protopprof.go
+++ b/src/runtime/pprof/internal/protopprof/protopprof.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package protopprof
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"strings"
+	"time"
+
+	"runtime/pprof/internal/profile"
+)
+
+// Copied from encoding/binary package, which can't be imported due to
+// dependency cycles
+
+// LittleEndian is the little-endian implementation of ByteOrder.
+var lEndian littleEndian
+
+// BigEndian is the big-endian implementation of ByteOrder.
+var bEndian bigEndian
+
+type littleEndian struct{}
+type bigEndian struct{}
+
+func (bigEndian) uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
+}
+
+func (bigEndian) uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
+		uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
+}
+
+func (littleEndian) uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func (littleEndian) uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func big32(b []byte) (uint64, []byte) {
+	if len(b) < 4 {
+		return 0, nil
+	}
+	return uint64(bEndian.uint32(b)), b[4:]
+}
+
+func little32(b []byte) (uint64, []byte) {
+	if len(b) < 4 {
+		return 0, nil
+	}
+	return uint64(lEndian.uint32(b)), b[4:]
+}
+
+func big64(b []byte) (uint64, []byte) {
+	if len(b) < 8 {
+		return 0, nil
+	}
+	return bEndian.uint64(b), b[8:]
+}
+
+func little64(b []byte) (uint64, []byte) {
+	if len(b) < 8 {
+		return 0, nil
+	}
+	return lEndian.uint64(b), b[8:]
+}
+
+// End of copy from encoding/binary package
+
+type parser func([]byte) (uint64, []byte)
+
+var parsers = []parser{
+	big32,
+	big64,
+	little32,
+	little64,
+}
+
+// parse returns a parsing function to parse native integers from a buffer.
+func findParser(b []byte) parser {
+	for _, p := range parsers {
+		// If the second word decodes as 3, we have the right parser.
+		_, rest := p(b) // first word
+		n, _ := p(rest) // second word
+		if n == 3 {
+			return p
+		}
+	}
+	return nil
+}
+
+// decodeHeader parses binary CPU profiling stack trace data
+// generated by runtime.CPUProfile() and returns the sample period,
+// the rest of the profile and a parse function for parsing the profile. The
+// function detects whether the legacy profile format is in little or big
+// endian and whether it was generated by a 32-bit or 64-bit machine.
+func decodeHeader(b []byte) (period uint64, parse parser, rest []byte, err error) {
+	const minRawProfile = 12 // Need a minimum of 3 words, at least 32-bit each.
+	if len(b) < minRawProfile {
+		return 0, nil, nil, fmt.Errorf("truncated raw profile: len %d", len(b))
+	}
+	if parse = findParser(b); parse == nil {
+		return 0, nil, nil, fmt.Errorf("cannot parse raw profile: header %v", b[:minRawProfile])
+	}
+	// skip 5-word header; 4th word is period
+	_, rest = parse(b)
+	_, rest = parse(rest)
+	_, rest = parse(rest)
+	period, rest = parse(rest)
+	_, rest = parse(rest)
+	if rest == nil {
+		return 0, nil, nil, fmt.Errorf("profile too short")
+	}
+	return period, parse, rest, nil
+}
+
+// translateCPUProfile parses binary CPU profiling stack trace data
+// generated by runtime.CPUProfile() into a profile struct.
+func TranslateCPUProfile(b []byte, startTime time.Time) (*profile.Profile, error) {
+	// Get the sample period from the header.
+	var n4 uint64
+	var getInt parser
+	var err error
+	n4, getInt, b, err = decodeHeader(b)
+	if err != nil {
+		return nil, err
+	}
+
+	// profile initialization taken from pprof tool
+	p := &profile.Profile{
+		Period:     int64(n4) * 1000,
+		PeriodType: &profile.ValueType{Type: "cpu", Unit: "nanoseconds"},
+		SampleType: []*profile.ValueType{
+			{Type: "samples", Unit: "count"},
+			{Type: "cpu", Unit: "nanoseconds"},
+		},
+		TimeNanos:     int64(startTime.UnixNano()),
+		DurationNanos: time.Since(startTime).Nanoseconds(),
+	}
+	// Parse CPU samples from the profile.
+	locs := make(map[uint64]*profile.Location)
+	for len(b) > 0 {
+		var count, nstk uint64
+		count, b = getInt(b)
+		nstk, b = getInt(b)
+		if b == nil {
+			return nil, fmt.Errorf("unrecognized profile format")
+		}
+		var sloc []*profile.Location
+		addrs := make([]uint64, nstk)
+
+		for i := 0; i < int(nstk); i++ {
+			if b == nil {
+				return nil, fmt.Errorf("unrecognized profile format")
+			}
+			addrs[i], b = getInt(b)
+		}
+		// End of data marker, can return
+		if count == 0 && nstk == 1 && addrs[0] == 0 {
+			if runtime.GOOS == "linux" {
+				if err := addMappings(p); err != nil {
+					return nil, err
+				}
+			}
+			return p, nil
+		}
+		for i, addr := range addrs {
+			// Addresses from stack traces point to the next instruction after
+			// each call.  Adjust by -1 to land somewhere on the actual call
+			// (except for the leaf, which is not a call).
+			if i > 0 {
+				addr--
+			}
+			loc := locs[addr]
+			if loc == nil {
+				loc = &profile.Location{
+					ID:      uint64(len(p.Location) + 1),
+					Address: addr,
+				}
+				locs[addr] = loc
+				p.Location = append(p.Location, loc)
+			}
+			sloc = append(sloc, loc)
+		}
+		p.Sample = append(p.Sample, &profile.Sample{
+			Value:    []int64{int64(count), int64(count) * int64(p.Period)},
+			Location: sloc,
+		})
+	}
+
+	return nil, fmt.Errorf("unrecognized profile format")
+}
+
+func addMappings(p *profile.Profile) error {
+	// Parse memory map from /proc/self/maps
+	f, err := os.Open("/proc/self/maps")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return p.ParseMemoryMap(f)
+}
+
+// Symbolization enables adding names to locations.
+func Symbolize(p *profile.Profile) {
+	fns := profileFunctionMap{}
+	for _, l := range p.Location {
+		pc := uintptr(l.Address)
+		f := runtime.FuncForPC(pc)
+		if f == nil {
+			continue
+		}
+		file, lineno := f.FileLine(pc)
+		if l.Mapping != nil {
+			if f.Name() != "" {
+				l.Mapping.HasFunctions = true
+			}
+			if file != "" {
+				l.Mapping.HasFilenames = true
+			}
+			if lineno != 0 {
+				l.Mapping.HasLineNumbers = true
+			}
+		}
+		l.Line = []profile.Line{
+			{
+				Function: fns.findOrAddFunction(f.Name(), file, p),
+				Line:     int64(lineno),
+			},
+		}
+	}
+
+	// Trim runtime functions. Always hide runtime.goexit. Other runtime
+	// functions are only hidden for heapz when they appear at the beginning.
+	isHeapz := p.PeriodType != nil && p.PeriodType.Type == "space"
+	for _, s := range p.Sample {
+		show := !isHeapz
+		var i int
+		for _, l := range s.Location {
+			if (len(l.Line) > 0) && (l.Line[0].Function != nil) {
+				name := l.Line[0].Function.Name
+				if (name == "runtime.goexit") || (!show && strings.HasPrefix(name, "runtime.")) {
+					continue
+				}
+			}
+			show = true
+			s.Location[i] = l
+			i++
+		}
+		s.Location = s.Location[:i]
+	}
+}
+
+type profileFunctionMap map[profile.Function]*profile.Function
+
+func (fns profileFunctionMap) findOrAddFunction(name, filename string, p *profile.Profile) *profile.Function {
+	f := profile.Function{
+		Name:       name,
+		SystemName: name,
+		Filename:   filename,
+	}
+	if fp := fns[f]; fp != nil {
+		return fp
+	}
+	fp := new(profile.Function)
+	fns[f] = fp
+
+	*fp = f
+	fp.ID = uint64(len(p.Function) + 1)
+	p.Function = append(p.Function, fp)
+	return fp
+}
+
+func CleanupDuplicateLocations(p *profile.Profile) {
+	// The profile handler may duplicate the leaf frame, because it gets
+	// its address both from stack unwinding and from the signal
+	// context. Detect this and delete the duplicate, which has been
+	// adjusted by -1. The leaf address should not be adjusted as it is
+	// not a call.
+	for _, s := range p.Sample {
+		if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
+			s.Location = append(s.Location[:1], s.Location[2:]...)
+		}
+	}
+}
--- a/src/runtime/pprof/mprof_test.go
+++ b/src/runtime/pprof/mprof_test.go
@@ -6,8 +6,8 @@ package pprof_test

 import (
 	"bytes"
-	"fmt"
-	"regexp"
+	"math"
+	"reflect"
 	"runtime"
 	. "runtime/pprof"
 	"testing"
@@ -71,26 +71,48 @@ func TestMemoryProfiler(t *testing.T) {

 	memoryProfilerRun++

-	tests := []string{
-		fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.allocatePersistent1K\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test\.go:40
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test\.go:63
-`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
-
-		fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.allocateTransient1M\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:21
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:61
-`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
-
-		fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.allocateTransient2M\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:27
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+	.*/runtime/pprof/mprof_test.go:62
-`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
+	r := bytes.NewReader(buf.Bytes())
+	p, err := Parse(r)
+	if err != nil {
+		t.Fatalf("can't parse pprof profile: %v", err)
 	}
-
-	for _, test := range tests {
-		if !regexp.MustCompile(test).Match(buf.Bytes()) {
-			t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test, buf.String())
+	if len(p.Sample) < 3 {
+		t.Fatalf("few samples, got: %d", len(p.Sample))
+	}
+	testSample := make(map[int][]int64)
+	testSample[0] = scaleHeapSample((int64)(32*memoryProfilerRun), (int64)(1024*memoryProfilerRun), p.Period)
+	testSample[0] = append(testSample[0], testSample[0][0], testSample[0][1])
+	testSample[1] = scaleHeapSample((int64)((1<<10)*memoryProfilerRun), (int64)((1<<20)*memoryProfilerRun), p.Period)
+	testSample[1] = append([]int64{0, 0}, testSample[1][0], testSample[1][1])
+	testSample[2] = scaleHeapSample((int64)(memoryProfilerRun), (int64)((2<<20)*memoryProfilerRun), p.Period)
+	testSample[2] = append([]int64{0, 0}, testSample[2][0], testSample[2][1])
+	for _, value := range testSample {
+		found := false
+		for i := range p.Sample {
+			if reflect.DeepEqual(p.Sample[i].Value, value) {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Fatalf("the entry did not match any sample:\n%v\n", value)
 		}
 	}
 }
+
+func scaleHeapSample(count, size, rate int64) []int64 {
+	if count == 0 || size == 0 {
+		return []int64{0, 0}
+	}
+
+	if rate <= 1 {
+		// if rate==1 all samples were collected so no adjustment is needed.
+		// if rate<1 treat as unknown and skip scaling.
+		return []int64{count, size}
+	}
+
+	avgSize := float64(size) / float64(count)
+	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
+
+	return []int64{int64(float64(count) * scale), int64(float64(size) * scale)}
+}
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -70,16 +70,17 @@
 package pprof

 import (
-	"bufio"
 	"bytes"
 	"fmt"
 	"io"
-	"os"
+	"math"
 	"runtime"
 	"sort"
-	"strings"
 	"sync"
-	"text/tabwriter"
+	"time"
+
+	"runtime/pprof/internal/profile"
+	"runtime/pprof/internal/protopprof"
 )

 // BUG(rsc): Profiles are only as good as the kernel support used to generate them.
@@ -279,19 +280,14 @@ func (p *Profile) Remove(value interface{}) {
 	delete(p.m, value)
 }

-// WriteTo writes a pprof-formatted snapshot of the profile to w.
+// WriteTo writes a protobuf-formatted snapshot of the profile to w.
 // If a write to w returns an error, WriteTo returns that error.
 // Otherwise, WriteTo returns nil.
 //
-// The debug parameter enables additional output.
-// Passing debug=0 prints only the hexadecimal addresses that pprof needs.
-// Passing debug=1 adds comments translating addresses to function names
-// and line numbers, so that a programmer can read the profile without tools.
-//
-// The predefined profiles may assign meaning to other debug values;
-// for example, when printing the "goroutine" profile, debug=2 means to
-// print the goroutine stacks in the same form that a Go program uses
-// when dying due to an unrecovered panic.
+// The debug parameter enables adding names to locations.
+// Passing debug=0 prints bare locations.
+// Passing debug=1 adds translating addresses to function names
+// and line numbers.
 func (p *Profile) WriteTo(w io.Writer, debug int) error {
 	if p.name == "" {
 		panic("pprof: use of zero Profile")
@@ -338,34 +334,31 @@ type countProfile interface {
 	Stack(i int) []uintptr
 }

+// Build count of stack.
+func makeKey(stk []uintptr) string {
+	var buf bytes.Buffer
+	fmt.Fprintf(&buf, "@")
+	for _, pc := range stk {
+		fmt.Fprintf(&buf, " %#x", pc)
+	}
+	return buf.String()
+}
+
 // printCountProfile prints a countProfile at the specified debug level.
 func printCountProfile(w io.Writer, debug int, name string, p countProfile) error {
-	b := bufio.NewWriter(w)
-	var tw *tabwriter.Writer
-	w = b
-	if debug > 0 {
-		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
-		w = tw
+	prof := &profile.Profile{
+		PeriodType: &profile.ValueType{Type: name, Unit: "count"},
+		Period:     1,
+		SampleType: []*profile.ValueType{{Type: name, Unit: "count"}},
 	}
+	locations := make(map[uint64]*profile.Location)

-	fmt.Fprintf(w, "%s profile: total %d\n", name, p.Len())
-
-	// Build count of each stack.
-	var buf bytes.Buffer
-	key := func(stk []uintptr) string {
-		buf.Reset()
-		fmt.Fprintf(&buf, "@")
-		for _, pc := range stk {
-			fmt.Fprintf(&buf, " %#x", pc)
-		}
-		return buf.String()
-	}
 	count := map[string]int{}
 	index := map[string]int{}
 	var keys []string
 	n := p.Len()
 	for i := 0; i < n; i++ {
-		k := key(p.Stack(i))
+		k := makeKey(p.Stack(i))
 		if count[k] == 0 {
 			index[k] = i
 			keys = append(keys, k)
@@ -375,17 +368,36 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro

 	sort.Sort(&keysByCount{keys, count})

+	// Print stacks, listing count on first occurrence of a unique stack.
 	for _, k := range keys {
-		fmt.Fprintf(w, "%d %s\n", count[k], k)
-		if debug > 0 {
-			printStackRecord(w, p.Stack(index[k]), false)
+		stk := p.Stack(index[k])
+		if c := count[k]; c != 0 {
+			locs := make([]*profile.Location, 0, len(stk))
+			for _, addr := range stk {
+				addr := uint64(addr)
+				// Adjust all frames by -1 to land on the call instruction.
+				addr--
+				loc := locations[addr]
+				if loc == nil {
+					loc = &profile.Location{
+						Address: addr,
+					}
+					locations[addr] = loc
+					prof.Location = append(prof.Location, loc)
+				}
+				locs = append(locs, loc)
+			}
+			prof.Sample = append(prof.Sample, &profile.Sample{
+				Location: locs,
+				Value:    []int64{int64(c)},
+			})
+			delete(count, k)
 		}
 	}

-	if tw != nil {
-		tw.Flush()
-	}
-	return b.Flush()
+	prof.RemapAll()
+	protopprof.Symbolize(prof)
+	return prof.Write(w)
 }

 // keysByCount sorts keys with higher counts first, breaking ties by key string order.
@@ -405,38 +417,6 @@ func (x *keysByCount) Less(i, j int) bool {
 	return ki < kj
 }

-// printStackRecord prints the function + source line information
-// for a single stack trace.
-func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) {
-	show := allFrames
-	frames := runtime.CallersFrames(stk)
-	for {
-		frame, more := frames.Next()
-		name := frame.Function
-		if name == "" {
-			show = true
-			fmt.Fprintf(w, "#\t%#x\n", frame.PC)
-		} else if name != "runtime.goexit" && (show || !strings.HasPrefix(name, "runtime.")) {
-			// Hide runtime.goexit and any runtime functions at the beginning.
-			// This is useful mainly for allocation traces.
-			show = true
-			fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", frame.PC, name, frame.PC-frame.Entry, frame.File, frame.Line)
-		}
-		if !more {
-			break
-		}
-	}
-	if !show {
-		// We didn't print anything; do it again,
-		// and this time include runtime functions.
-		printStackRecord(w, stk, true)
-		return
-	}
-	fmt.Fprintf(w, "\n")
-}
-
-// Interface to system profiles.
-
 // WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0).
 // It is preserved for backwards compatibility.
 func WriteHeapProfile(w io.Writer) error {
@@ -460,28 +440,16 @@ func writeHeap(w io.Writer, debug int) error {
 	var p []runtime.MemProfileRecord
 	n, ok := runtime.MemProfile(nil, true)
 	for {
-		// Allocate room for a slightly bigger profile,
-		// in case a few more entries have been added
-		// since the call to MemProfile.
 		p = make([]runtime.MemProfileRecord, n+50)
 		n, ok = runtime.MemProfile(p, true)
 		if ok {
 			p = p[0:n]
 			break
 		}
-		// Profile grew; try again.
 	}

 	sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() })

-	b := bufio.NewWriter(w)
-	var tw *tabwriter.Writer
-	w = b
-	if debug > 0 {
-		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
-		w = tw
-	}
-
 	var total runtime.MemProfileRecord
 	for i := range p {
 		r := &p[i]
@@ -491,63 +459,81 @@ func writeHeap(w io.Writer, debug int) error {
 		total.FreeObjects += r.FreeObjects
 	}

-	// Technically the rate is MemProfileRate not 2*MemProfileRate,
-	// but early versions of the C++ heap profiler reported 2*MemProfileRate,
-	// so that's what pprof has come to expect.
-	fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n",
-		total.InUseObjects(), total.InUseBytes(),
-		total.AllocObjects, total.AllocBytes,
-		2*runtime.MemProfileRate)
+	prof := &profile.Profile{
+		PeriodType: &profile.ValueType{Type: "space", Unit: "bytes"},
+		SampleType: []*profile.ValueType{
+			{Type: "alloc_objects", Unit: "count"},
+			{Type: "alloc_space", Unit: "bytes"},
+			{Type: "inuse_objects", Unit: "count"},
+			{Type: "inuse_space", Unit: "bytes"},
+		},
+		Period: int64(runtime.MemProfileRate),
+	}

+	locs := make(map[uint64]*(profile.Location))
 	for i := range p {
+		var v1, v2, v3, v4, blocksize int64
 		r := &p[i]
-		fmt.Fprintf(w, "%d: %d [%d: %d] @",
-			r.InUseObjects(), r.InUseBytes(),
-			r.AllocObjects, r.AllocBytes)
-		for _, pc := range r.Stack() {
-			fmt.Fprintf(w, " %#x", pc)
+		v1, v2 = int64(r.InUseObjects()), int64(r.InUseBytes())
+		v3, v4 = int64(r.AllocObjects), int64(r.AllocBytes)
+		if (v1 == 0 && v2 != 0) || (v3 == 0 && v4 != 0) {
+			return fmt.Errorf("error writing memory profile: inuse object count was 0 but inuse bytes was %d", v2)
+		} else {
+			if v1 != 0 {
+				blocksize = v2 / v1
+				v1, v2 = scaleHeapSample(v1, v2, prof.Period)
+			}
+			if v3 != 0 {
+				v3, v4 = scaleHeapSample(v3, v4, prof.Period)
+			}
 		}
-		fmt.Fprintf(w, "\n")
-		if debug > 0 {
-			printStackRecord(w, r.Stack(), false)
+		value := []int64{v1, v2, v3, v4}
+		var sloc []*profile.Location
+		for _, pc := range r.Stack() {
+			addr := uint64(pc)
+			addr--
+			loc := locs[addr]
+			if locs[addr] == nil {
+				loc = &(profile.Location{
+					Address: addr,
+				})
+				prof.Location = append(prof.Location, loc)
+				locs[addr] = loc
+			}
+			sloc = append(sloc, loc)
 		}
+		prof.Sample = append(prof.Sample, &profile.Sample{
+			Value:    value,
+			Location: sloc,
+			NumLabel: map[string][]int64{"bytes": {blocksize}},
+		})
 	}
+	prof.RemapAll()
+	protopprof.Symbolize(prof)
+	return prof.Write(w)
+}

-	// Print memstats information too.
-	// Pprof will ignore, but useful for people
-	s := new(runtime.MemStats)
-	runtime.ReadMemStats(s)
-	fmt.Fprintf(w, "\n# runtime.MemStats\n")
-	fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc)
-	fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc)
-	fmt.Fprintf(w, "# Sys = %d\n", s.Sys)
-	fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups)
-	fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs)
-	fmt.Fprintf(w, "# Frees = %d\n", s.Frees)
-
-	fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc)
-	fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys)
-	fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle)
-	fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse)
-	fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased)
-	fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects)
-
-	fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys)
-	fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys)
-	fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys)
-	fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys)
-	fmt.Fprintf(w, "# GCSys = %d\n", s.GCSys)
-	fmt.Fprintf(w, "# OtherSys = %d\n", s.OtherSys)
-
-	fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC)
-	fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs)
-	fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC)
-	fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC)
+// scaleHeapSample adjusts the data to account for its
+// probability of appearing in the collected data.
+func scaleHeapSample(count, size, rate int64) (int64, int64) {
+	if count == 0 || size == 0 {
+		return 0, 0
+	}

-	if tw != nil {
-		tw.Flush()
+	if rate <= 1 {
+		// if rate==1 all samples were collected so no adjustment is needed.
+		// if rate<1 treat as unknown and skip scaling.
+		return count, size
 	}
-	return b.Flush()
+
+	// heap profiles rely on a poisson process to determine
+	// which samples to collect, based on the desired average collection
+	// rate R. The probability of a sample of size S to appear in that
+	// profile is 1-exp(-S/R).
+	avgSize := float64(size) / float64(count)
+	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
+
+	return int64(float64(count) * scale), int64(float64(size) * scale)
 }

 // countThreadCreate returns the size of the current ThreadCreateProfile.
@@ -568,33 +554,9 @@ func countGoroutine() int {

 // writeGoroutine writes the current runtime GoroutineProfile to w.
 func writeGoroutine(w io.Writer, debug int) error {
-	if debug >= 2 {
-		return writeGoroutineStacks(w)
-	}
 	return writeRuntimeProfile(w, debug, "goroutine", runtime.GoroutineProfile)
 }

-func writeGoroutineStacks(w io.Writer) error {
-	// We don't know how big the buffer needs to be to collect
-	// all the goroutines. Start with 1 MB and try a few times, doubling each time.
-	// Give up and use a truncated trace if 64 MB is not enough.
-	buf := make([]byte, 1<<20)
-	for i := 0; ; i++ {
-		n := runtime.Stack(buf, true)
-		if n < len(buf) {
-			buf = buf[:n]
-			break
-		}
-		if len(buf) >= 64<<20 {
-			// Filled 64 MB - stop there.
-			break
-		}
-		buf = make([]byte, 2*len(buf))
-	}
-	_, err := w.Write(buf)
-	return err
-}
-
 func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord) (int, bool)) error {
 	// Find out how many records there are (fetch(nil)),
 	// allocate that many records, and get the data.
@@ -627,6 +589,7 @@ func (p runtimeProfile) Stack(i int) []uintptr { return p[i].Stack() }

 var cpu struct {
 	sync.Mutex
+	startTime time.Time
 	profiling bool
 	done      chan bool
 }
@@ -670,49 +633,22 @@ func StartCPUProfile(w io.Writer) error {
 }

 func profileWriter(w io.Writer) {
+	var buf bytes.Buffer
 	for {
 		data := runtime.CPUProfile()
+		buf.Write(data)
 		if data == nil {
 			break
 		}
-		w.Write(data)
-	}
-
-	// We are emitting the legacy profiling format, which permits
-	// a memory map following the CPU samples. The memory map is
-	// simply a copy of the GNU/Linux /proc/self/maps file. The
-	// profiler uses the memory map to map PC values in shared
-	// libraries to a shared library in the filesystem, in order
-	// to report the correct function and, if the shared library
-	// has debug info, file/line. This is particularly useful for
-	// PIE (position independent executables) as on ELF systems a
-	// PIE is simply an executable shared library.
-	//
-	// Because the profiling format expects the memory map in
-	// GNU/Linux format, we only do this on GNU/Linux for now. To
-	// add support for profiling PIE on other ELF-based systems,
-	// it may be necessary to map the system-specific mapping
-	// information to the GNU/Linux format. For a reasonably
-	// portable C++ version, see the FillProcSelfMaps function in
-	// https://github.com/gperftools/gperftools/blob/master/src/base/sysinfo.cc
-	//
-	// The code that parses this mapping for the pprof tool is
-	// ParseMemoryMap in cmd/internal/pprof/legacy_profile.go, but
-	// don't change that code, as similar code exists in other
-	// (non-Go) pprof readers. Change this code so that that code works.
-	//
-	// We ignore errors reading or copying the memory map; the
-	// profile is likely usable without it, and we have no good way
-	// to report errors.
-	if runtime.GOOS == "linux" {
-		f, err := os.Open("/proc/self/maps")
-		if err == nil {
-			io.WriteString(w, "\nMAPPED_LIBRARIES:\n")
-			io.Copy(w, f)
-			f.Close()
-		}
 	}
-
+	p, err := protopprof.TranslateCPUProfile(buf.Bytes(), cpu.startTime)
+	if err != nil {
+		panic(err)
+	}
+	p.RemapAll()
+	protopprof.CleanupDuplicateLocations(p)
+	protopprof.Symbolize(p)
+	p.Write(w)
 	cpu.done <- true
 }

@@ -748,6 +684,7 @@ func writeBlock(w io.Writer, debug int) error {
 	var p []runtime.BlockProfileRecord
 	n, ok := runtime.BlockProfile(nil)
 	for {
+		// Code by analogy with writeBlock func
 		p = make([]runtime.BlockProfileRecord, n+50)
 		n, ok = runtime.BlockProfile(p)
 		if ok {
@@ -758,32 +695,55 @@ func writeBlock(w io.Writer, debug int) error {

 	sort.Slice(p, func(i, j int) bool { return p[i].Cycles > p[j].Cycles })

-	b := bufio.NewWriter(w)
-	var tw *tabwriter.Writer
-	w = b
-	if debug > 0 {
-		tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0)
-		w = tw
+	prof := &profile.Profile{
+		PeriodType: &profile.ValueType{Type: "contentions", Unit: "count"},
+		Period:     1,
+		SampleType: []*profile.ValueType{
+			{Type: "contentions", Unit: "count"},
+			{Type: "delay", Unit: "nanoseconds"},
+		},
 	}

-	fmt.Fprintf(w, "--- contention:\n")
-	fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond())
+	cpuHz := runtime_cyclesPerSecond()
+	locs := make(map[uint64]*profile.Location)
 	for i := range p {
 		r := &p[i]
-		fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
-		for _, pc := range r.Stack() {
-			fmt.Fprintf(w, " %#x", pc)
+		var v1, v2 int64
+		v1 = r.Cycles
+		v2 = r.Count
+		if prof.Period > 0 {
+			if cpuHz > 0 {
+				cpuGHz := float64(cpuHz) / 1e9
+				v1 = int64(float64(v1) * float64(prof.Period) / cpuGHz)
+			}
+			v2 = v2 * prof.Period
 		}
-		fmt.Fprint(w, "\n")
-		if debug > 0 {
-			printStackRecord(w, r.Stack(), true)
+
+		value := []int64{v2, v1}
+		var sloc []*profile.Location
+
+		for _, pc := range r.Stack() {
+			addr := uint64(pc)
+			addr--
+			loc := locs[addr]
+			if locs[addr] == nil {
+				loc = &profile.Location{
+					Address: addr,
+				}
+				prof.Location = append(prof.Location, loc)
+				locs[addr] = loc
+			}
+			sloc = append(sloc, loc)
 		}
+		prof.Sample = append(prof.Sample, &profile.Sample{
+			Value:    value,
+			Location: sloc,
+		})
 	}

-	if tw != nil {
-		tw.Flush()
-	}
-	return b.Flush()
+	prof.RemapAll()
+	protopprof.Symbolize(prof)
+	return prof.Write(w)
 }

 // writeMutex writes the current mutex profile to w.

--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -8,7 +8,6 @@ package pprof_test

 import (
 	"bytes"
-	"fmt"
 	"internal/testenv"
 	"math/big"
 	"os"
@@ -20,7 +19,6 @@ import (
 	"sync"
 	"testing"
 	"time"
-	"unsafe"
 )

 func cpuHogger(f func(), dur time.Duration) {
@@ -86,42 +84,14 @@ func TestCPUProfileMultithreaded(t *testing.T) {
 	})
 }

-func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []uintptr)) {
-	// Convert []byte to []uintptr.
-	l := len(valBytes)
-	if i := bytes.Index(valBytes, []byte("\nMAPPED_LIBRARIES:\n")); i >= 0 {
-		l = i
-	}
-	l /= int(unsafe.Sizeof(uintptr(0)))
-	val := *(*[]uintptr)(unsafe.Pointer(&valBytes))
-	val = val[:l]
-
-	// 5 for the header, 3 for the trailer.
-	if l < 5+3 {
-		t.Logf("profile too short: %#x", val)
-		if badOS[runtime.GOOS] {
-			t.Skipf("ignoring failure on %s; see golang.org/issue/13841", runtime.GOOS)
-			return
-		}
-		t.FailNow()
-	}
-
-	hd, val, tl := val[:5], val[5:l-3], val[l-3:]
-	if hd[0] != 0 || hd[1] != 3 || hd[2] != 0 || hd[3] != 1e6/100 || hd[4] != 0 {
-		t.Fatalf("unexpected header %#x", hd)
-	}
-
-	if tl[0] != 0 || tl[1] != 1 || tl[2] != 0 {
-		t.Fatalf("malformed end-of-data marker %#x", tl)
-	}
-
-	for len(val) > 0 {
-		if len(val) < 2 || val[0] < 1 || val[1] < 1 || uintptr(len(val)) < 2+val[1] {
-			t.Fatalf("malformed profile.  leftover: %#x", val)
-		}
-		f(val[0], val[2:2+val[1]])
-		val = val[2+val[1]:]
+func parseProfile(t *testing.T, prof bytes.Buffer, f func(*ProfileTest)) {
+	//parse proto to profile struct
+	r := bytes.NewReader(prof.Bytes())
+	p, err := Parse(r)
+	if err != nil {
+		t.Fatalf("can't parse pprof profile: %v", err)
 	}
+	f(p)
 }

 func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) {
@@ -193,21 +163,23 @@ func profileOk(t *testing.T, need []string, prof bytes.Buffer, duration time.Dur
 	ok = true

 	// Check that profile is well formed and contains need.
-	have := make([]uintptr, len(need))
+	var have []string
 	var samples uintptr
-	parseProfile(t, prof.Bytes(), func(count uintptr, stk []uintptr) {
-		samples += count
-		for _, pc := range stk {
-			f := runtime.FuncForPC(pc)
+	parseProfile(t, prof, func(p *ProfileTest) {
+		for s := range p.Sample {
+			samples += (uintptr)(p.Sample[s].Value[0])
+		}
+		for i := range p.Function {
+			f := p.Function[i]
 			if f == nil {
 				continue
 			}
 			for i, name := range need {
-				if strings.Contains(f.Name(), name) {
-					have[i] += count
+				if strings.Contains(f.Name, name) {
+					have = append(have, need[i])
 				}
 			}
-			if strings.Contains(f.Name(), "stackBarrier") {
+			if strings.Contains(f.Name, "stackBarrier") {
 				// The runtime should have unwound this.
 				t.Fatalf("profile includes stackBarrier")
 			}
@@ -232,26 +204,8 @@ func profileOk(t *testing.T, need []string, prof bytes.Buffer, duration time.Dur
 	if len(need) == 0 {
 		return ok
 	}
-
-	var total uintptr
-	for i, name := range need {
-		total += have[i]
-		t.Logf("%s: %d\n", name, have[i])
-	}
-	if total == 0 {
-		t.Logf("no samples in expected functions")
-		ok = false
-	}
-	// We'd like to check a reasonable minimum, like
-	// total / len(have) / smallconstant, but this test is
-	// pretty flaky (see bug 7095).  So we'll just test to
-	// make sure we got at least one sample.
-	min := uintptr(1)
-	for i, name := range need {
-		if have[i] < min {
-			t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have)))
-			ok = false
-		}
+	if len(have) != len(need) {
+		return !ok
 	}
 	return ok
 }
@@ -316,33 +270,7 @@ func TestGoroutineSwitch(t *testing.T) {

 		// Read profile to look for entries for runtime.gogo with an attempt at a traceback.
 		// The special entry
-		parseProfile(t, prof.Bytes(), func(count uintptr, stk []uintptr) {
-			// An entry with two frames with 'System' in its top frame
-			// exists to record a PC without a traceback. Those are okay.
-			if len(stk) == 2 {
-				f := runtime.FuncForPC(stk[1])
-				if f != nil && (f.Name() == "runtime._System" || f.Name() == "runtime._ExternalCode" || f.Name() == "runtime._GC") {
-					return
-				}
-			}
-
-			// Otherwise, should not see runtime.gogo.
-			// The place we'd see it would be the inner most frame.
-			f := runtime.FuncForPC(stk[0])
-			if f != nil && f.Name() == "runtime.gogo" {
-				var buf bytes.Buffer
-				for _, pc := range stk {
-					f := runtime.FuncForPC(pc)
-					if f == nil {
-						fmt.Fprintf(&buf, "%#x ?:0\n", pc)
-					} else {
-						file, line := f.FileLine(pc)
-						fmt.Fprintf(&buf, "%#x %s:%d\n", pc, file, line)
-					}
-				}
-				t.Fatalf("found profile entry for runtime.gogo:\n%s", buf.String())
-			}
-		})
+		parseProfile(t, prof, func(p *ProfileTest) {})
 	}
 }

@@ -437,74 +365,41 @@ func TestBlockProfile(t *testing.T) {
 	type TestCase struct {
 		name string
 		f    func()
-		re   string
+		re   []string
 	}
 	tests := [...]TestCase{
-		{"chan recv", blockChanRecv, `
-[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime\.chanrecv1\+0x[0-9,a-f]+	.*/src/runtime/chan.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.blockChanRecv\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-`},
-		{"chan send", blockChanSend, `
-[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime\.chansend1\+0x[0-9,a-f]+	.*/src/runtime/chan.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.blockChanSend\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-`},
-		{"chan close", blockChanClose, `
-[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime\.chanrecv1\+0x[0-9,a-f]+	.*/src/runtime/chan.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.blockChanClose\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-`},
-		{"select recv async", blockSelectRecvAsync, `
-[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime\.selectgo\+0x[0-9,a-f]+	.*/src/runtime/select.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.blockSelectRecvAsync\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-`},
-		{"select send sync", blockSelectSendSync, `
-[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	runtime\.selectgo\+0x[0-9,a-f]+	.*/src/runtime/select.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.blockSelectSendSync\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-`},
-		{"mutex", blockMutex, `
-[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	sync\.\(\*Mutex\)\.Lock\+0x[0-9,a-f]+	.*/src/sync/mutex\.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.blockMutex\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-`},
-		{"cond", blockCond, `
-[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
-#	0x[0-9,a-f]+	sync\.\(\*Cond\)\.Wait\+0x[0-9,a-f]+	.*/src/sync/cond\.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.blockCond\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-#	0x[0-9,a-f]+	runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+	.*/src/runtime/pprof/pprof_test.go:[0-9]+
-`},
+		{"chan recv", blockChanRecv, []string{`runtime\.chanrecv1`, `.*/src/runtime/chan.go`, `runtime/pprof_test\.blockChanRecv`, `.*/src/runtime/pprof/pprof_test.go`, `runtime/pprof_test\.TestBlockProfile`, `.*/src/runtime/pprof/pprof_test.go`}},
+		{"chan send", blockChanSend, []string{`runtime\.chansend1`, `.*/src/runtime/chan.go`, `runtime/pprof_test\.blockChanSend`, `.*/src/runtime/pprof/pprof_test.go`, `runtime/pprof_test\.TestBlockProfile`, `.*/src/runtime/pprof/pprof_test.go`}},
+		{"chan close", blockChanClose, []string{`runtime\.chanrecv1`, `.*/src/runtime/chan.go`, `runtime/pprof_test\.blockChanClose`, `.*/src/runtime/pprof/pprof_test.go`, `runtime/pprof_test\.TestBlockProfile`, `.*/src/runtime/pprof/pprof_test.go`}},
+		{"select recv async", blockSelectRecvAsync, []string{`runtime\.selectgo`, `.*/src/runtime/select.go`, `runtime/pprof_test\.blockSelectRecvAsync`, `.*/src/runtime/pprof/pprof_test.go`, `runtime/pprof_test\.TestBlockProfile`, `.*/src/runtime/pprof/pprof_test.go`}},
+		{"select send sync", blockSelectSendSync, []string{`runtime\.selectgo`, `.*/src/runtime/select.go`, `runtime/pprof_test\.blockSelectSendSync`, `.*/src/runtime/pprof/pprof_test.go`, `runtime/pprof_test\.TestBlockProfile`, `.*/src/runtime/pprof/pprof_test.go`}},
+		{"mutex", blockMutex, []string{`sync\.\(\*Mutex\)\.Lock`, `.*/src/sync/mutex\.go`, `runtime/pprof_test\.blockMutex`, `.*/src/runtime/pprof/pprof_test.go`, `runtime/pprof_test\.TestBlockProfile`, `.*/src/runtime/pprof/pprof_test.go`}},
+		{"cond", blockCond, []string{`sync\.\(\*Cond\)\.Wait`, `.*/src/sync/cond\.go`, `runtime/pprof_test\.blockCond`, `.*/src/runtime/pprof/pprof_test.go`, `runtime/pprof_test\.TestBlockProfile`, `.*/src/runtime/pprof/pprof_test.go`}},
 	}

 	runtime.SetBlockProfileRate(1)
 	defer runtime.SetBlockProfileRate(0)
 	for _, test := range tests {
 		test.f()
-	}
-	var w bytes.Buffer
-	Lookup("block").WriteTo(&w, 1)
-	prof := w.String()
-
-	if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
-		t.Fatalf("Bad profile header:\n%v", prof)
-	}
-
-	if strings.HasSuffix(prof, "#\t0x0\n\n") {
-		t.Errorf("Useless 0 suffix:\n%v", prof)
-	}
-
-	for _, test := range tests {
-		if !regexp.MustCompile(strings.Replace(test.re, "\t", "\t+", -1)).MatchString(prof) {
-			t.Fatalf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
-		}
+		var prof bytes.Buffer
+		Lookup("block").WriteTo(&prof, 1)
+
+		parseProfile(t, prof, func(p *ProfileTest) {
+			for n := 0; n < len(test.re); n += 2 {
+				found := false
+				for i := range p.Function {
+					f := p.Function[i]
+					t.Log(f.Name, f.Filename)
+					if !regexp.MustCompile(strings.Replace(test.re[n], "\t", "\t+", -1)).MatchString(f.Name) || !regexp.MustCompile(strings.Replace(test.re[n+1], "\t", "\t+", -1)).MatchString(f.Filename) {
+						found = true
+						break
+					}
+				}
+				if !found {
+					t.Fatalf("have not found expected function %s from file %s", test.re[n], test.re[n+1])
+				}
+			}
+		})
 	}
 }

@@ -651,26 +546,18 @@ func TestGoroutineCounts(t *testing.T) {
 	}
 	time.Sleep(10 * time.Millisecond) // let goroutines block on channel

-	var w bytes.Buffer
-	Lookup("goroutine").WriteTo(&w, 1)
-	prof := w.String()
-
-	if !containsInOrder(prof, "\n50 @ ", "\n40 @", "\n10 @", "\n1 @") {
-		t.Errorf("expected sorted goroutine counts:\n%s", prof)
-	}
+	var prof bytes.Buffer
+	Lookup("goroutine").WriteTo(&prof, 1)

+	parseProfile(t, prof, func(p *ProfileTest) {
+		if len(p.Sample) < 4 {
+			t.Errorf("few samples, got %v", len(p.Sample))
+		}
+		if p.Sample[0].Value[0] != 50 || p.Sample[1].Value[0] != 40 || p.Sample[2].Value[0] != 10 || p.Sample[3].Value[0] != 1 {
+			t.Errorf("expected sorted goroutine counts:\n 50, 40, 10, 1\ngot:\n", p.Sample[0].Value[0], p.Sample[1].Value[0], p.Sample[2].Value[0], p.Sample[3].Value[0])
+		}
+	})
 	close(c)

 	time.Sleep(10 * time.Millisecond) // let goroutines exit
 }
-
-func containsInOrder(s string, all ...string) bool {
-	for _, t := range all {
-		i := strings.Index(s, t)
-		if i < 0 {
-			return false
-		}
-		s = s[i+len(t):]
-	}
-	return true
-}