Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
golang / usr / local / go / src / internal / trace / summary.go
Size: Mime:
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package trace

import (
	tracev2 "internal/trace/v2"
	"sort"
	"time"
)

// Summary is the analysis result produced by the summarizer.
type Summary struct {
	Goroutines map[tracev2.GoID]*GoroutineSummary
	Tasks      map[tracev2.TaskID]*UserTaskSummary
}

// GoroutineSummary contains statistics and execution details of a single goroutine.
// (For v2 traces.)
type GoroutineSummary struct {
	ID           tracev2.GoID
	Name         string       // A non-unique human-friendly identifier for the goroutine.
	PC           uint64       // The start PC of the goroutine.
	CreationTime tracev2.Time // Timestamp of the first appearance in the trace.
	StartTime    tracev2.Time // Timestamp of the first time it started running. 0 if the goroutine never ran.
	EndTime      tracev2.Time // Timestamp of when the goroutine exited. 0 if the goroutine never exited.

	// List of regions in the goroutine, sorted based on the start time.
	Regions []*UserRegionSummary

	// Statistics of execution time during the goroutine execution.
	GoroutineExecStats

	// goroutineSummary is state used just for computing this structure.
	// It's dropped before being returned to the caller.
	//
	// More specifically, if it's nil, it indicates that this summary has
	// already been finalized.
	*goroutineSummary
}

// UserTaskSummary represents a task in the trace.
type UserTaskSummary struct {
	ID       tracev2.TaskID
	Name     string
	Parent   *UserTaskSummary // nil if the parent is unknown.
	Children []*UserTaskSummary

	// Task begin event. An EventTaskBegin event or nil.
	Start *tracev2.Event

	// End end event. Normally EventTaskEnd event or nil.
	End *tracev2.Event

	// Logs is a list of tracev2.EventLog events associated with the task.
	Logs []*tracev2.Event

	// List of regions in the task, sorted based on the start time.
	Regions []*UserRegionSummary

	// Goroutines is the set of goroutines associated with this task.
	Goroutines map[tracev2.GoID]*GoroutineSummary
}

// Complete returns true if we have complete information about the task
// from the trace: both a start and an end.
func (s *UserTaskSummary) Complete() bool {
	return s.Start != nil && s.End != nil
}

// Descendents returns a slice consisting of itself (always the first task returned),
// and the transitive closure of all of its children.
func (s *UserTaskSummary) Descendents() []*UserTaskSummary {
	descendents := []*UserTaskSummary{s}
	for _, child := range s.Children {
		descendents = append(descendents, child.Descendents()...)
	}
	return descendents
}

// UserRegionSummary represents a region and goroutine execution stats
// while the region was active. (For v2 traces.)
type UserRegionSummary struct {
	TaskID tracev2.TaskID
	Name   string

	// Region start event. Normally EventRegionBegin event or nil,
	// but can be a state transition event from NotExist or Undetermined
	// if the region is a synthetic region representing task inheritance
	// from the parent goroutine.
	Start *tracev2.Event

	// Region end event. Normally EventRegionEnd event or nil,
	// but can be a state transition event to NotExist if the goroutine
	// terminated without explicitly ending the region.
	End *tracev2.Event

	GoroutineExecStats
}

// GoroutineExecStats contains statistics about a goroutine's execution
// during a period of time.
type GoroutineExecStats struct {
	// These stats are all non-overlapping.
	ExecTime          time.Duration
	SchedWaitTime     time.Duration
	BlockTimeByReason map[string]time.Duration
	SyscallTime       time.Duration
	SyscallBlockTime  time.Duration

	// TotalTime is the duration of the goroutine's presence in the trace.
	// Necessarily overlaps with other stats.
	TotalTime time.Duration

	// Total time the goroutine spent in certain ranges; may overlap
	// with other stats.
	RangeTime map[string]time.Duration
}

func (s GoroutineExecStats) NonOverlappingStats() map[string]time.Duration {
	stats := map[string]time.Duration{
		"Execution time":         s.ExecTime,
		"Sched wait time":        s.SchedWaitTime,
		"Syscall execution time": s.SyscallTime,
		"Block time (syscall)":   s.SyscallBlockTime,
		"Unknown time":           s.UnknownTime(),
	}
	for reason, dt := range s.BlockTimeByReason {
		stats["Block time ("+reason+")"] += dt
	}
	// N.B. Don't include RangeTime or TotalTime; they overlap with these other
	// stats.
	return stats
}

// UnknownTime returns whatever isn't accounted for in TotalTime.
func (s GoroutineExecStats) UnknownTime() time.Duration {
	sum := s.ExecTime + s.SchedWaitTime + s.SyscallTime +
		s.SyscallBlockTime
	for _, dt := range s.BlockTimeByReason {
		sum += dt
	}
	// N.B. Don't include range time. Ranges overlap with
	// other stats, whereas these stats are non-overlapping.
	if sum < s.TotalTime {
		return s.TotalTime - sum
	}
	return 0
}

// sub returns the stats v-s.
func (s GoroutineExecStats) sub(v GoroutineExecStats) (r GoroutineExecStats) {
	r = s.clone()
	r.ExecTime -= v.ExecTime
	r.SchedWaitTime -= v.SchedWaitTime
	for reason := range s.BlockTimeByReason {
		r.BlockTimeByReason[reason] -= v.BlockTimeByReason[reason]
	}
	r.SyscallTime -= v.SyscallTime
	r.SyscallBlockTime -= v.SyscallBlockTime
	r.TotalTime -= v.TotalTime
	for name := range s.RangeTime {
		r.RangeTime[name] -= v.RangeTime[name]
	}
	return r
}

func (s GoroutineExecStats) clone() (r GoroutineExecStats) {
	r = s
	r.BlockTimeByReason = make(map[string]time.Duration)
	for reason, dt := range s.BlockTimeByReason {
		r.BlockTimeByReason[reason] = dt
	}
	r.RangeTime = make(map[string]time.Duration)
	for name, dt := range s.RangeTime {
		r.RangeTime[name] = dt
	}
	return r
}

// snapshotStat returns the snapshot of the goroutine execution statistics.
// This is called as we process the ordered trace event stream. lastTs is used
// to process pending statistics if this is called before any goroutine end event.
func (g *GoroutineSummary) snapshotStat(lastTs tracev2.Time) (ret GoroutineExecStats) {
	ret = g.GoroutineExecStats.clone()

	if g.goroutineSummary == nil {
		return ret // Already finalized; no pending state.
	}

	// Set the total time if necessary.
	if g.TotalTime == 0 {
		ret.TotalTime = lastTs.Sub(g.CreationTime)
	}

	// Add in time since lastTs.
	if g.lastStartTime != 0 {
		ret.ExecTime += lastTs.Sub(g.lastStartTime)
	}
	if g.lastRunnableTime != 0 {
		ret.SchedWaitTime += lastTs.Sub(g.lastRunnableTime)
	}
	if g.lastBlockTime != 0 {
		ret.BlockTimeByReason[g.lastBlockReason] += lastTs.Sub(g.lastBlockTime)
	}
	if g.lastSyscallTime != 0 {
		ret.SyscallTime += lastTs.Sub(g.lastSyscallTime)
	}
	if g.lastSyscallBlockTime != 0 {
		ret.SchedWaitTime += lastTs.Sub(g.lastSyscallBlockTime)
	}
	for name, ts := range g.lastRangeTime {
		ret.RangeTime[name] += lastTs.Sub(ts)
	}
	return ret
}

// finalize is called when processing a goroutine end event or at
// the end of trace processing. This finalizes the execution stat
// and any active regions in the goroutine, in which case trigger is nil.
func (g *GoroutineSummary) finalize(lastTs tracev2.Time, trigger *tracev2.Event) {
	if trigger != nil {
		g.EndTime = trigger.Time()
	}
	finalStat := g.snapshotStat(lastTs)

	g.GoroutineExecStats = finalStat

	// System goroutines are never part of regions, even though they
	// "inherit" a task due to creation (EvGoCreate) from within a region.
	// This may happen e.g. if the first GC is triggered within a region,
	// starting the GC worker goroutines.
	if !IsSystemGoroutine(g.Name) {
		for _, s := range g.activeRegions {
			s.End = trigger
			s.GoroutineExecStats = finalStat.sub(s.GoroutineExecStats)
			g.Regions = append(g.Regions, s)
		}
	}
	*(g.goroutineSummary) = goroutineSummary{}
}

// goroutineSummary is a private part of GoroutineSummary that is required only during analysis.
type goroutineSummary struct {
	lastStartTime        tracev2.Time
	lastRunnableTime     tracev2.Time
	lastBlockTime        tracev2.Time
	lastBlockReason      string
	lastSyscallTime      tracev2.Time
	lastSyscallBlockTime tracev2.Time
	lastRangeTime        map[string]tracev2.Time
	activeRegions        []*UserRegionSummary // stack of active regions
}

// Summarizer constructs per-goroutine time statistics for v2 traces.
type Summarizer struct {
	// gs contains the map of goroutine summaries we're building up to return to the caller.
	gs map[tracev2.GoID]*GoroutineSummary

	// tasks contains the map of task summaries we're building up to return to the caller.
	tasks map[tracev2.TaskID]*UserTaskSummary

	// syscallingP and syscallingG represent a binding between a P and G in a syscall.
	// Used to correctly identify and clean up after syscalls (blocking or otherwise).
	syscallingP map[tracev2.ProcID]tracev2.GoID
	syscallingG map[tracev2.GoID]tracev2.ProcID

	// rangesP is used for optimistic tracking of P-based ranges for goroutines.
	//
	// It's a best-effort mapping of an active range on a P to the goroutine we think
	// is associated with it.
	rangesP map[rangeP]tracev2.GoID

	lastTs tracev2.Time // timestamp of the last event processed.
	syncTs tracev2.Time // timestamp of the last sync event processed (or the first timestamp in the trace).
}

// NewSummarizer creates a new struct to build goroutine stats from a trace.
func NewSummarizer() *Summarizer {
	return &Summarizer{
		gs:          make(map[tracev2.GoID]*GoroutineSummary),
		tasks:       make(map[tracev2.TaskID]*UserTaskSummary),
		syscallingP: make(map[tracev2.ProcID]tracev2.GoID),
		syscallingG: make(map[tracev2.GoID]tracev2.ProcID),
		rangesP:     make(map[rangeP]tracev2.GoID),
	}
}

type rangeP struct {
	id   tracev2.ProcID
	name string
}

// Event feeds a single event into the stats summarizer.
func (s *Summarizer) Event(ev *tracev2.Event) {
	if s.syncTs == 0 {
		s.syncTs = ev.Time()
	}
	s.lastTs = ev.Time()

	switch ev.Kind() {
	// Record sync time for the RangeActive events.
	case tracev2.EventSync:
		s.syncTs = ev.Time()

	// Handle state transitions.
	case tracev2.EventStateTransition:
		st := ev.StateTransition()
		switch st.Resource.Kind {
		// Handle goroutine transitions, which are the meat of this computation.
		case tracev2.ResourceGoroutine:
			id := st.Resource.Goroutine()
			old, new := st.Goroutine()
			if old == new {
				// Skip these events; they're not telling us anything new.
				break
			}

			// Handle transition out.
			g := s.gs[id]
			switch old {
			case tracev2.GoUndetermined, tracev2.GoNotExist:
				g = &GoroutineSummary{ID: id, goroutineSummary: &goroutineSummary{}}
				// If we're coming out of GoUndetermined, then the creation time is the
				// time of the last sync.
				if old == tracev2.GoUndetermined {
					g.CreationTime = s.syncTs
				} else {
					g.CreationTime = ev.Time()
				}
				// The goroutine is being created, or it's being named for the first time.
				g.lastRangeTime = make(map[string]tracev2.Time)
				g.BlockTimeByReason = make(map[string]time.Duration)
				g.RangeTime = make(map[string]time.Duration)

				// When a goroutine is newly created, inherit the task
				// of the active region. For ease handling of this
				// case, we create a fake region description with the
				// task id. This isn't strictly necessary as this
				// goroutine may not be associated with the task, but
				// it can be convenient to see all children created
				// during a region.
				//
				// N.B. ev.Goroutine() will always be NoGoroutine for the
				// Undetermined case, so this is will simply not fire.
				if creatorG := s.gs[ev.Goroutine()]; creatorG != nil && len(creatorG.activeRegions) > 0 {
					regions := creatorG.activeRegions
					s := regions[len(regions)-1]
					g.activeRegions = []*UserRegionSummary{{TaskID: s.TaskID, Start: ev}}
				}
				s.gs[g.ID] = g
			case tracev2.GoRunning:
				// Record execution time as we transition out of running
				g.ExecTime += ev.Time().Sub(g.lastStartTime)
				g.lastStartTime = 0
			case tracev2.GoWaiting:
				// Record block time as we transition out of waiting.
				if g.lastBlockTime != 0 {
					g.BlockTimeByReason[g.lastBlockReason] += ev.Time().Sub(g.lastBlockTime)
					g.lastBlockTime = 0
				}
			case tracev2.GoRunnable:
				// Record sched latency time as we transition out of runnable.
				if g.lastRunnableTime != 0 {
					g.SchedWaitTime += ev.Time().Sub(g.lastRunnableTime)
					g.lastRunnableTime = 0
				}
			case tracev2.GoSyscall:
				// Record syscall execution time and syscall block time as we transition out of syscall.
				if g.lastSyscallTime != 0 {
					if g.lastSyscallBlockTime != 0 {
						g.SyscallBlockTime += ev.Time().Sub(g.lastSyscallBlockTime)
						g.SyscallTime += g.lastSyscallBlockTime.Sub(g.lastSyscallTime)
					} else {
						g.SyscallTime += ev.Time().Sub(g.lastSyscallTime)
					}
					g.lastSyscallTime = 0
					g.lastSyscallBlockTime = 0

					// Clear the syscall map.
					delete(s.syscallingP, s.syscallingG[id])
					delete(s.syscallingG, id)
				}
			}

			// The goroutine hasn't been identified yet. Take the transition stack
			// and identify the goroutine by the bottom-most frame of that stack.
			// This bottom-most frame will be identical for all transitions on this
			// goroutine, because it represents its immutable start point.
			if g.PC == 0 {
				stk := st.Stack
				if stk != tracev2.NoStack {
					var frame tracev2.StackFrame
					var ok bool
					stk.Frames(func(f tracev2.StackFrame) bool {
						frame = f
						ok = true
						return false
					})
					if ok {
						g.PC = frame.PC
						g.Name = frame.Func
					}
				}
			}

			// Handle transition in.
			switch new {
			case tracev2.GoRunning:
				// We started running. Record it.
				g.lastStartTime = ev.Time()
				if g.StartTime == 0 {
					g.StartTime = ev.Time()
				}
			case tracev2.GoRunnable:
				g.lastRunnableTime = ev.Time()
			case tracev2.GoWaiting:
				if st.Reason != "forever" {
					g.lastBlockTime = ev.Time()
					g.lastBlockReason = st.Reason
					break
				}
				// "Forever" is like goroutine death.
				fallthrough
			case tracev2.GoNotExist:
				g.finalize(ev.Time(), ev)
			case tracev2.GoSyscall:
				s.syscallingP[ev.Proc()] = id
				s.syscallingG[id] = ev.Proc()
				g.lastSyscallTime = ev.Time()
			}

		// Handle procs to detect syscall blocking, which si identifiable as a
		// proc going idle while the goroutine it was attached to is in a syscall.
		case tracev2.ResourceProc:
			id := st.Resource.Proc()
			old, new := st.Proc()
			if old != new && new == tracev2.ProcIdle {
				if goid, ok := s.syscallingP[id]; ok {
					g := s.gs[goid]
					g.lastSyscallBlockTime = ev.Time()
					delete(s.syscallingP, id)
				}
			}
		}

	// Handle ranges of all kinds.
	case tracev2.EventRangeBegin, tracev2.EventRangeActive:
		r := ev.Range()
		var g *GoroutineSummary
		switch r.Scope.Kind {
		case tracev2.ResourceGoroutine:
			// Simple goroutine range. We attribute the entire range regardless of
			// goroutine stats. Lots of situations are still identifiable, e.g. a
			// goroutine blocked often in mark assist will have both high mark assist
			// and high block times. Those interested in a deeper view can look at the
			// trace viewer.
			g = s.gs[r.Scope.Goroutine()]
		case tracev2.ResourceProc:
			// N.B. These ranges are not actually bound to the goroutine, they're
			// bound to the P. But if we happen to be on the P the whole time, let's
			// try to attribute it to the goroutine. (e.g. GC sweeps are here.)
			g = s.gs[ev.Goroutine()]
			if g != nil {
				s.rangesP[rangeP{id: r.Scope.Proc(), name: r.Name}] = ev.Goroutine()
			}
		}
		if g == nil {
			break
		}
		if ev.Kind() == tracev2.EventRangeActive {
			if ts := g.lastRangeTime[r.Name]; ts != 0 {
				g.RangeTime[r.Name] += s.syncTs.Sub(ts)
			}
			g.lastRangeTime[r.Name] = s.syncTs
		} else {
			g.lastRangeTime[r.Name] = ev.Time()
		}
	case tracev2.EventRangeEnd:
		r := ev.Range()
		var g *GoroutineSummary
		switch r.Scope.Kind {
		case tracev2.ResourceGoroutine:
			g = s.gs[r.Scope.Goroutine()]
		case tracev2.ResourceProc:
			rp := rangeP{id: r.Scope.Proc(), name: r.Name}
			if goid, ok := s.rangesP[rp]; ok {
				if goid == ev.Goroutine() {
					// As the comment in the RangeBegin case states, this is only OK
					// if we finish on the same goroutine we started on.
					g = s.gs[goid]
				}
				delete(s.rangesP, rp)
			}
		}
		if g == nil {
			break
		}
		ts := g.lastRangeTime[r.Name]
		if ts == 0 {
			break
		}
		g.RangeTime[r.Name] += ev.Time().Sub(ts)
		delete(g.lastRangeTime, r.Name)

	// Handle user-defined regions.
	case tracev2.EventRegionBegin:
		g := s.gs[ev.Goroutine()]
		r := ev.Region()
		region := &UserRegionSummary{
			Name:               r.Type,
			TaskID:             r.Task,
			Start:              ev,
			GoroutineExecStats: g.snapshotStat(ev.Time()),
		}
		g.activeRegions = append(g.activeRegions, region)
		// Associate the region and current goroutine to the task.
		task := s.getOrAddTask(r.Task)
		task.Regions = append(task.Regions, region)
		task.Goroutines[g.ID] = g
	case tracev2.EventRegionEnd:
		g := s.gs[ev.Goroutine()]
		r := ev.Region()
		var sd *UserRegionSummary
		if regionStk := g.activeRegions; len(regionStk) > 0 {
			// Pop the top region from the stack since that's what must have ended.
			n := len(regionStk)
			sd = regionStk[n-1]
			regionStk = regionStk[:n-1]
			g.activeRegions = regionStk
			// N.B. No need to add the region to a task; the EventRegionBegin already handled it.
		} else {
			// This is an "end" without a start. Just fabricate the region now.
			sd = &UserRegionSummary{Name: r.Type, TaskID: r.Task}
			// Associate the region and current goroutine to the task.
			task := s.getOrAddTask(r.Task)
			task.Goroutines[g.ID] = g
			task.Regions = append(task.Regions, sd)
		}
		sd.GoroutineExecStats = g.snapshotStat(ev.Time()).sub(sd.GoroutineExecStats)
		sd.End = ev
		g.Regions = append(g.Regions, sd)

	// Handle tasks and logs.
	case tracev2.EventTaskBegin, tracev2.EventTaskEnd:
		// Initialize the task.
		t := ev.Task()
		task := s.getOrAddTask(t.ID)
		task.Name = t.Type
		task.Goroutines[ev.Goroutine()] = s.gs[ev.Goroutine()]
		if ev.Kind() == tracev2.EventTaskBegin {
			task.Start = ev
		} else {
			task.End = ev
		}
		// Initialize the parent, if one exists and it hasn't been done yet.
		// We need to avoid doing it twice, otherwise we could appear twice
		// in the parent's Children list.
		if t.Parent != tracev2.NoTask && task.Parent == nil {
			parent := s.getOrAddTask(t.Parent)
			task.Parent = parent
			parent.Children = append(parent.Children, task)
		}
	case tracev2.EventLog:
		log := ev.Log()
		// Just add the log to the task. We'll create the task if it
		// doesn't exist (it's just been mentioned now).
		task := s.getOrAddTask(log.Task)
		task.Goroutines[ev.Goroutine()] = s.gs[ev.Goroutine()]
		task.Logs = append(task.Logs, ev)
	}
}

func (s *Summarizer) getOrAddTask(id tracev2.TaskID) *UserTaskSummary {
	task := s.tasks[id]
	if task == nil {
		task = &UserTaskSummary{ID: id, Goroutines: make(map[tracev2.GoID]*GoroutineSummary)}
		s.tasks[id] = task
	}
	return task
}

// Finalize indicates to the summarizer that we're done processing the trace.
// It cleans up any remaining state and returns the full summary.
func (s *Summarizer) Finalize() *Summary {
	for _, g := range s.gs {
		g.finalize(s.lastTs, nil)

		// Sort based on region start time.
		sort.Slice(g.Regions, func(i, j int) bool {
			x := g.Regions[i].Start
			y := g.Regions[j].Start
			if x == nil {
				return true
			}
			if y == nil {
				return false
			}
			return x.Time() < y.Time()
		})
		g.goroutineSummary = nil
	}
	return &Summary{
		Goroutines: s.gs,
		Tasks:      s.tasks,
	}
}

// RelatedGoroutinesV2 finds a set of goroutines related to goroutine goid for v2 traces.
// The association is based on whether they have synchronized with each other in the Go
// scheduler (one has unblocked another).
func RelatedGoroutinesV2(events []tracev2.Event, goid tracev2.GoID) map[tracev2.GoID]struct{} {
	// Process all the events, looking for transitions of goroutines
	// out of GoWaiting. If there was an active goroutine when this
	// happened, then we know that active goroutine unblocked another.
	// Scribble all these down so we can process them.
	type unblockEdge struct {
		operator tracev2.GoID
		operand  tracev2.GoID
	}
	var unblockEdges []unblockEdge
	for _, ev := range events {
		if ev.Goroutine() == tracev2.NoGoroutine {
			continue
		}
		if ev.Kind() != tracev2.EventStateTransition {
			continue
		}
		st := ev.StateTransition()
		if st.Resource.Kind != tracev2.ResourceGoroutine {
			continue
		}
		id := st.Resource.Goroutine()
		old, new := st.Goroutine()
		if old == new || old != tracev2.GoWaiting {
			continue
		}
		unblockEdges = append(unblockEdges, unblockEdge{
			operator: ev.Goroutine(),
			operand:  id,
		})
	}
	// Compute the transitive closure of depth 2 of goroutines that have unblocked each other
	// (starting from goid).
	gmap := make(map[tracev2.GoID]struct{})
	gmap[goid] = struct{}{}
	for i := 0; i < 2; i++ {
		// Copy the map.
		gmap1 := make(map[tracev2.GoID]struct{})
		for g := range gmap {
			gmap1[g] = struct{}{}
		}
		for _, edge := range unblockEdges {
			if _, ok := gmap[edge.operand]; ok {
				gmap1[edge.operator] = struct{}{}
			}
		}
		gmap = gmap1
	}
	return gmap
}