陷入系统调用

执行系统调用抢占 #

handoffp #

handoffp函数:判断是否需要启动工作线程来接管_p_,如果不需要则把_p_放入P的全局空闲队列.

如果当前P的运行队列有任务/全局队列有任务有gc任务大概率整个系统有任务待做,或者当前P是最后一个P要用作轮询网络

其中大概率整个系统有任务待做: 由第24行 atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 决定:

如果没有P空闲,而且这些没空闲的P连接的M不是处在自旋状态(没有自旋的M),证明其他P都在工作(而不是假装工作实际是在伺机从其他队列偷任务那种),是不是说明整个系统除了当前P外都在做事,所以大概率某些P的运行队列上有任务积压,所以我们应该启动M,让它跟当前P连接,去别的P中偷任务)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
// Hands off P from syscall or locked M.
// Always runs without a P, so write barriers are not allowed.
//go:nowritebarrierrec
func handoffp(_p_ *p) {
	// handoffp must start an M in any situation where
	// findrunnable would return a G to run on _p_.

	// if it has local work, start it straight away
	// 如果本地P中运行队列不为空/全局运行队列不为空就调用startm函数
	if !runqempty(_p_) || sched.runqsize != 0 {
		startm(_p_, false) // 这个我们前面讨论过,
		return
	}
	// if it has GC work, start it straight away 
	// 如果有GC任务,就调用startm函数
	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
		startm(_p_, false)
		return
	}
	// no local work, check that there are no spinning/idle M's,
	// otherwise our help is not required
	// 没有自旋状态的M,且没有空闲的P(所有其它p都在运行goroutine,所有的M都在运行/睡眠)
	// 只能说明系统比较忙, ??? 需要启动m ???
	if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
		startm(_p_, true)
		return
	}
	lock(&sched.lock)
	if sched.gcwaiting != 0 {
		_p_.status = _Pgcstop
		sched.stopwait--
		if sched.stopwait == 0 {
			notewakeup(&sched.stopnote)
		}
		unlock(&sched.lock)
		return
	}
	if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) {
		sched.safePointFn(_p_)
		sched.safePointWait--
		if sched.safePointWait == 0 {
			notewakeup(&sched.safePointNote)
		}
	}
	if sched.runqsize != 0 { // 全局运行队列大小不是0; 说明Goroutine需要运行,有工作要做.
		unlock(&sched.lock)
		startm(_p_, false)
		return
	}
	// If this is the last running P and nobody is polling network,
	// need to wakeup another M to poll network.
	// 如果只有当前P还在运行;需要唤醒别的M来轮询网络
	if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 {
		unlock(&sched.lock)
		startm(_p_, false)
		return
	}
	pidleput(_p_)  // 无事可做,把p放入全局空闲队列
	unlock(&sched.lock)
}

其中startm函数我们前面介绍过 startm

20220629170524

系统调用收尾,如从系统调用返回,如何重新得到P #

定义程序 #

main.go

package main

import (
    "fmt"
    "os"
)

var path = "appss.txt"

func isError(err error) bool {
    if err != nil {
        fmt.Println(err.Error())
    }
    return (err != nil)
}

func main() {
    var file, err = os.OpenFile(path, os.O_RDWR, 0644)
    if isError(err) {
        return
    }
    defer file.Close()
}

gdb调试前准备 #

编译程序 #

编译一下源代码: go build -gcflags "-N -l" -o test ..

准备mcall函数断点的文件 #

  • gdb
    • list /usr/lib/golang/src/syscall/zsyscall_linux_amd64.go:62
    • list /usr/lib/golang/src/syscall/asm_linux_amd64.s:44

gdb #

// func Syscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr)
TEXT ·Syscall6(SB),NOSPLIT,$0-80
	CALL	runtime·entersyscall(SB)
	MOVQ	a1+8(FP), DI
	MOVQ	a2+16(FP), SI
	MOVQ	a3+24(FP), DX
	MOVQ	a4+32(FP), R10
	MOVQ	a5+40(FP), R8
	MOVQ	a6+48(FP), R9

	SYSCALL
	CMPQ	AX, $0xfffffffffffff001
	JLS	ok6
	MOVQ	$-1, r1+56(FP)
	MOVQ	$0, r2+64(FP)
	NEGQ	AX
	MOVQ	AX, err+72(FP)
	CALL	runtime·exitsyscall(SB)
	RET
ok6:
	MOVQ	AX, r1+56(FP)
	MOVQ	DX, r2+64(FP)
	MOVQ	$0, err+72(FP)
	CALL	runtime·exitsyscall(SB)
  RET

entersyscall #

src/runtime/proc.go

func entersyscall() {
	reentersyscall(getcallerpc(), getcallersp()) // 这个是Goroutine的pc, sp,不是g0的,因为还没有切换栈。
}

reentersyscall #

  • reentersyscall主要是做三件事:
    • 把PC,SP保存到当前Goroutine.sched里面;
    • 解除M与P两者之间的关系;
    • 设置P的状态为_Psyscall
/*
- 把PC,SP保存到当前Goroutine.sched里面;
- 解除M与P两者之间的关系;
- 设置P的状态为_Psyscall
*/
func reentersyscall(pc, sp uintptr) {
	_g_ := getg() // get Goroutine的g

	// Disable preemption because during this function g is in Gsyscall status,
	// but can have inconsistent g->sched, do not let GC observe it.
	_g_.m.locks++ // ++就能让GC不能观察到?TODO zxc:

	// Entersyscall must not call any function that might split/grow the stack.
	// (See details in comment above.)
	// Catch calls that might, by replacing the stack guard with something that
	// will trip any stack check and leaving a flag to tell newstack to die.
	_g_.stackguard0 = stackPreempt //进入系统调用前就设置了抢占标志。
	_g_.throwsplit = true

	// Leave SP around for GC and traceback.
	save(pc, sp) //保存寄存器的值到当前Goroutine的sched结构体。
	_g_.syscallsp = sp //gc使用
	_g_.syscallpc = pc //gc使用
	casgstatus(_g_, _Grunning, _Gsyscall) // 修改状态
	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
		systemstack(func() {
			print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
			throw("entersyscall")
		})
	}

	if trace.enabled {
		systemstack(traceGoSysCall)
		// systemstack itself clobbers g.sched.{pc,sp} and we might
		// need them later when the G is genuinely blocked in a
		// syscall
		save(pc, sp)
	}

	if atomic.Load(&sched.sysmonwait) != 0 {
		systemstack(entersyscall_sysmon)
		save(pc, sp)
	}

	if _g_.m.p.ptr().runSafePointFn != 0 {
		// runSafePointFn may stack split if run on this stack
		systemstack(runSafePointFn)
		save(pc, sp)
	}

	_g_.m.syscalltick = _g_.m.p.ptr().syscalltick //把P的syscalltick,放到m中。
	_g_.sysblocktraced = true
	_g_.m.mcache = nil
	pp := _g_.m.p.ptr()
	pp.m = 0 // 解除P与M的关系。
	_g_.m.oldp.set(pp) // 把现在的P放到M中的oldp中。
	_g_.m.p = 0 // 解除M与P的关系。
	atomic.Store(&pp.status, _Psyscall) // 修改P的状态为系统调用。
	if sched.gcwaiting != 0 {
		systemstack(entersyscall_gcwait)
		save(pc, sp)
	}

	_g_.m.locks-- // --解除锁定。
}
  • 这里需要注意的是:在进入系统调用的时候,它是没有进行自增的,它是在exitsyscall()函数才开始进行自增的;

  • 这个就是为了判断P,在当前Goroutine进入系统调用,到返回的那一段时间,这个P有可能又被其他M关联,然后又进入_Psyscall状态,_g_.m.syscalltick = _g_.m.p.ptr().syscalltick //把P的syscalltick,放到m中。

exitsyscall #

  • 这个退出系统调用:
    • 尝试重新绑定oldp,如果没有成功,从全局空闲P队列获得一个P。
    • 如果还是失败,mcall–>exitsyscall0(),
      • 在这个里面再次从全局空闲P队列中尝试下,如果失败就把Goroutine放入全局空闲G队列;
      • M放入全局空闲M队列,休眠M;
      • schedule().

/*
这个退出系统调用:
  - 尝试重新绑定oldp,如果没有成功,从全局空闲P队列获得一个P。
  - 如果还是失败,mcall-->exitsyscall0(),
    - 在这个里面再次从全局空闲P队列中尝试下,如果失败就把Goroutine放入全局空闲G队列;
    - M放入全局空闲M队列,休眠M;
    - schedule().
*/
func exitsyscall() {
	_g_ := getg()

	_g_.m.locks++ // see comment in entersyscall 防止GC? TODO zxc:
	if getcallersp() > _g_.syscallsp {
		throw("exitsyscall: syscall frame is no longer valid")
	}

	_g_.waitsince = 0
	oldp := _g_.m.oldp.ptr() //重新取出oldp
	_g_.m.oldp = 0
	if exitsyscallfast(oldp) { //如果返回true,那么M与P在这个里面已经重新关联了。
		if _g_.m.mcache == nil {
			throw("lost mcache")
		}
		if trace.enabled {
			if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
				systemstack(traceGoStart)
			}
		}
		// There's a cpu for us, so we can run.
		_g_.m.p.ptr().syscalltick++ //系统调用完成,syscalltick自增。
		// We need to cas the status and scan before resuming...
		casgstatus(_g_, _Gsyscall, _Grunning)

		// Garbage collector isn't running (since we are),
		// so okay to clear syscallsp.
		_g_.syscallsp = 0
		_g_.m.locks--
		if _g_.preempt {
			// restore the preemption request in case we've cleared it in newstack
			_g_.stackguard0 = stackPreempt
		} else {
			// otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
			_g_.stackguard0 = _g_.stack.lo + _StackGuard //在entersyscall里面我们设置_g_.stackguard0 = stackPreempt //进入系统调用前就设置了抢占标志。这里要恢复。
		}
		_g_.throwsplit = false

		if sched.disable.user && !schedEnabled(_g_) {
			// Scheduling of this goroutine is disabled.
			Gosched()
		}

		return
	}

	_g_.sysexitticks = 0
	if trace.enabled {
		// Wait till traceGoSysBlock event is emitted.
		// This ensures consistency of the trace (the goroutine is started after it is blocked).
		for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
			osyield()
		}
		// We can't trace syscall exit right now because we don't have a P.
		// Tracing code can invoke write barriers that cannot run without a P.
		// So instead we remember the syscall exit time and emit the event
		// in execute when we have a P.
		_g_.sysexitticks = cputicks()
	}

	_g_.m.locks--

	// Call the scheduler.
	mcall(exitsyscall0)

	if _g_.m.mcache == nil {
		throw("lost mcache")
	}

	// Scheduler returned, so we're allowed to run now.
	// Delete the syscallsp information that we left for
	// the garbage collector during the system call.
	// Must wait until now because until gosched returns
	// we don't know for sure that the garbage collector
	// is not running.
	_g_.syscallsp = 0
	_g_.m.p.ptr().syscalltick++
	_g_.throwsplit = false
}
exitsyscallfast #
//go:nosplit
func exitsyscallfast(oldp *p) bool {
	_g_ := getg()

	// Freezetheworld sets stopwait but does not retake P's.
	if sched.stopwait == freezeStopWait {
		return false
	}

	// Try to re-acquire the last P.
	if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
		/*
			- 查看老的P的状态是否是正处于_Psyscall;
		      - 从reentersyscall里面的三个步骤,当它设置为_Psyscall, 它这个时候是没有与任何M相关联。
		      - 所以这里如果发现P又处于_psyscall,直接关联。
		*/
		// There's a cpu for us, so we can run.
		wirep(oldp) // 关联M和P;当前的M和这个oldp。
		exitsyscallfast_reacquired()
		return true
	}

	// Try to get any other idle P.
	if sched.pidle != 0 {
		var ok bool
		systemstack(func() {
			ok = exitsyscallfast_pidle()
			if ok && trace.enabled {
				if oldp != nil {
					// Wait till traceGoSysBlock event is emitted.
					// This ensures consistency of the trace (the goroutine is started after it is blocked).
					for oldp.syscalltick == _g_.m.syscalltick {
						osyield()
					}
				}
				traceGoSysExit(0)
			}
		})
		if ok {
			return true
		}
	}
	return false
}
exitsyscallfast_reacquired #

func exitsyscallfast_reacquired() {
	_g_ := getg()
	if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { // 如果他们两者不相等,那么说明该p被收回,然后再次进入syscall(因为_g_.m.syscalltick变了)
		if trace.enabled {
			// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
			// traceGoSysBlock for this syscall was already emitted,
			// but here we effectively retake the p from the new syscall running on the same p.
			systemstack(func() {
				// Denote blocking of the new syscall.
				traceGoSysBlock(_g_.m.p.ptr())
				// Denote completion of the current syscall.
				traceGoSysExit(0)
			})
		}
		_g_.m.p.ptr().syscalltick++ // 这里又开始自增了--->因为它在进入reentersyscall()函数是不能增加这个值的。只有当退出exitsyscall()函数才会自增,所以如果
	}
}

mcall(exitsyscall0) #

// exitsyscall slow path on g0.
// Failed to acquire P, enqueue gp as runnable.
//
//go:nowritebarrierrec
func exitsyscall0(gp *g) {
	_g_ := getg()

	casgstatus(gp, _Gsyscall, _Grunnable) //从系统调用状态转变为可运行状态
	dropg() //断开M与G之间的关系
	lock(&sched.lock) //要修改全局的sched,先加锁
	var _p_ *p
	if schedEnabled(_g_) {
		_p_ = pidleget() //从全局空闲P队列获取一个P
	}
	if _p_ == nil {
		globrunqput(gp) //如果没有获取P,那么把Goroutine放入全局空闲g队列。
	} else if atomic.Load(&sched.sysmonwait) != 0 {
		atomic.Store(&sched.sysmonwait, 0)
		notewakeup(&sched.sysmonnote)
	}
	unlock(&sched.lock)
	if _p_ != nil { //如果有获取到P。
		acquirep(_p_) // 关联P与M
		execute(gp, false) // Never returns. 直接执行
	}
	if _g_.m.lockedg != 0 { // TODO zxc: 我记得是这个某个g,必须运行在某个线程上面,比如,main.main.
		// Wait until another thread schedules gp and so m again.
		stoplockedm()
		execute(gp, false) // Never returns.
	}
	stopm() //停止M。
	schedule() // Never returns.
}

syscalltick #

这个syscalltick;发现不是每次系统调用一次,才增加一次。

在我们这里,

  • entersystem
    • g.m.syscalltick = g.m.p.ptr().syscalltick
  • existsystem
    • exitsyscall主函数里面有一次;
    • exitsyscallfast_reacquired函数又增加了一次.

// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock, // we remember current value of syscalltick in m (g.m.syscalltick = g.m.p.ptr().syscalltick), // whoever emits traceGoSysBlock increments p.syscalltick afterwards; // and we wait for the increment before emitting traceGoSysExit. // Note that the increment is done even if tracing is not enabled, // because tracing can be enabled in the middle of syscall. We don’t want the wait to hang.

// 为了确保traceGoSysExit严格在traceGoSysBlock之后发出。 // 我们记住m中syscalltick的当前值(g.m.syscalltick = g.m.p.ptr().syscalltick)。 // 不管是谁发出traceGoSysBlock,都会在之后增量p.syscalltick。 // 我们等待增量后再发出 traceGoSysExit。 // 注意,即使没有启用跟踪,增量也会被完成。 // 因为跟踪可以在syscall中间启用。我们不希望等待被挂起。

在这个解释里面,发现跟踪的时候也会syscalltick

syscall返回