开启系统监控 #

这个是main goroutine执行的主函数，此函数之后会调用用户定义main函数,我们在初始化一章已经提过 main goroutine执行的主函数

它的里面可以看到调用了newm函数生成新M,且新M将执行sysmon函数

func main() {
    //...

	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
		systemstack(func() {
			newm(sysmon, nil)
		})
    }

    //...
}

sysmon #

可以看到sysmon里面是一个for循环,它会一直运行,因为这个M没有关联P，所以不允许写屏障逻辑比较简单，延时睡眠，调用retake

// Always runs without a P, so write barriers are not allowed.
//
//go:nowritebarrierrec
func sysmon() {
	lock(&sched.lock)
	sched.nmsys++ //增加记录系统线程的值的个数
	checkdead()
	unlock(&sched.lock)

	lasttrace := int64(0)
	idle := 0 // how many cycles in succession we had not wokeup somebody
	delay := uint32(0)
	for {
		if idle == 0 { // start with 20us sleep...
			delay = 20
		} else if idle > 50 { // start doubling the sleep after 1ms...
			delay *= 2
		}
		if delay > 10*1000 { // up to 10ms
			delay = 10 * 1000
		}
        usleep(delay)
        
        //...

		// retake P's blocked in syscalls
		// and preempt long running G's
		// 抢占被系统调用阻塞的P和抢占长期运行的G
		if retake(now) != 0 {
			idle = 0
		} else {
			idle++
		}
        // check if we need to force a GC
        //...
	}
}

retake #

retake是怎么区分是否是本次调度一直在运行?

通过p结构体里面的sysmontick,快照p结构体中schedtick，所以下次再比较两者，参见下面的16-31行

type sysmontick struct {
	schedtick   uint32
	schedwhen   int64
	syscalltick uint32
	syscallwhen int64
}

retake怎么判断是否应该抢断?

只有P是_Prunning/_Psyscall状态,才会进行抢占
一种是用户代码运行太久会被抢占(参见下方的25行):如果pd.schedwhen ～ now这个时间段大于阀值forcePreemptNS,就会调用preemptone函数做抢断准备
一种是进入了系统调用的抢占:主要思想是如果P接下来没有其他工作可做(本地G队列为空了),这时候抢占它没有意义，但为了防止sysmon线程深度睡眠(上文有分析retake的返回参数能决定sysmon的睡眠时长)

retake函数判断不进行系统剥夺抢占逻辑，由第50行代码决定: runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now

_p_的本地运行队列没有Gs; runqempty(p)返回true
有空闲的P,或者有正在自旋状态的M(正在偷其他P队列的Gs); atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0返回true
上次观测到的系统调用还没有超过10毫秒; pd.syscallwhen+1010001000 > now返回true

所以当程序没有工作需要做,且系统调用没有超过10ms就不进行系统调用抢占; 上式前两项说明这个程序没有工作需要做; 最后一项说明系统调用还没超过10ms

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
func retake(now int64) uint32 {
	n := 0
	// Prevent allp slice changes. This lock will be completely
	// uncontended unless we're already stopping the world.
	lock(&allpLock)
	// We can't use a range loop over allp because we may
	// temporarily drop the allpLock. Hence, we need to re-fetch
	// allp each time around the loop.
	for i := 0; i < len(allp); i++ { //遍历所有的P
		_p_ := allp[i]
		if _p_ == nil {
			// This can happen if procresize has grown
			// allp but not yet created new Ps.
			continue
		}
		pd := &_p_.sysmontick // 最后一次被sysmon观察到的tick
		s := _p_.status
		sysretake := false
		if s == _Prunning || s == _Psyscall { //只有当p处于 _Prunning 或 _Psyscall 状态时才会进行抢占
			// Preempt G if it's running for too long.
			t := int64(_p_.schedtick)  // _p_.schedtick：每发生一次调度，调度器对该值加一
			if int64(pd.schedtick) != t { // 监控线程监控到一次新的调度，所以重置跟sysmon相关的schedtick和schedwhen变量
				pd.schedtick = uint32(t)
				pd.schedwhen = now
			} else if pd.schedwhen+forcePreemptNS <= now { //  1. 没有进第一个if语句内,说明:pd.schedtick == t; 说明(pd.schedwhen ～ now)这段时间未发生过调度;
				preemptone(_p_)                            //  2. 但是这个_P_上面的某个Goroutine被执行,一直在执行这个Goroutiine; 中间没有切换其他Goroutine,因为如果切会导致_P_.schedtick增长,导致进入第一个if语句内;
				// In case of syscall, preemptone() doesn't // 3. 连续运行超过10毫秒了，设置抢占请求.
				// work, because there is no M wired to P.
				sysretake = true   // 需要系统抢占
			}
		}
		if s == _Psyscall { // P处于系统调用之中，需要检查是否需要抢占
			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
			t := int64(_p_.syscalltick) // 用于记录系统调用的次数，主要由工作线程在完成系统调用之后加一
			if !sysretake && int64(pd.syscalltick) != t { // 不相等---说明已经不是上次观察到的系统调用,开始了一个新的系统调用,所以重置一下
				pd.syscalltick = uint32(t)
				pd.syscallwhen = now
				continue
			}
			// On the one hand we don't want to retake Ps if there is no other work to do,
			// but on the other hand we want to retake them eventually
			// because they can prevent the sysmon thread from deep sleep.

			// 1.  _p_的本地运行队列没有Gs; runqempty(_p_)返回true
			// 2. 有空闲的P,或者有正在自旋状态的M(正在偷其他P队列的Gs); atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0返回true
			// 3. 上次观测到的系统调用还没有超过10毫秒; pd.syscallwhen+10*1000*1000 > now返回true
			// - concluing: 当程序没有工作需要做,且系统调用没有超过10ms就不进行系统调用抢占.
			//   - 1和2说明这个程序没有工作需要做;
			//   - 3说明系统调用还没超过10ms
			if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
				continue
			}
			// Drop allpLock so we can take sched.lock.
			unlock(&allpLock)
			// Need to decrement number of idle locked M's
			// (pretending that one more is running) before the CAS.
			// Otherwise the M from which we retake can exit the syscall,
			// increment nmidle and report deadlock.
			incidlelocked(-1)
			if atomic.Cas(&_p_.status, s, _Pidle) { // 需要抢占，则通过使用cas修改p的状态来获取p的使用权
				if trace.enabled {                  // CAS: 工作线程此时此刻可能正好从系统调用返回了，也正在获取p的使用权
					traceGoSysBlock(_p_)
					traceProcStop(_p_)
				}
				n++
				_p_.syscalltick++
				handoffp(_p_)  // 寻找一个新的m出来接管P
			}
			incidlelocked(1)
			lock(&allpLock)
		}
	}
	unlock(&allpLock)
	return uint32(n)
}

下面两章将分别说说两种不同的剥夺抢占。

用户执行过久: 第26行的preemptone函数
陷入系统调用: 第67行的handoffp函数