util/osdiag: add query for Windows page file configuration and status

It's very common for OOM crashes on Windows to be caused by lack of page
file space (the NT kernel does not overcommit). Since Windows automatically
manages page file space by default, unless the machine is out of disk space,
this is typically caused by manual page file configurations that are too
small.

This patch obtains the current page file size, the amount of free page file
space, and also determines whether the page file is automatically or manually
managed.

Fixes #9090

Signed-off-by: Aaron Klotz <aaron@tailscale.com>
This commit is contained in:
Aaron Klotz 2023-08-24 12:15:20 -06:00
parent 535db01b3f
commit 6b6a8cf843
3 changed files with 92 additions and 4 deletions

View File

@ -6,6 +6,7 @@
//go:generate go run golang.org/x/sys/windows/mkwinsyscall -output zsyscall_windows.go mksyscall.go
//go:generate go run golang.org/x/tools/cmd/goimports -w zsyscall_windows.go
//sys globalMemoryStatusEx(memStatus *_MEMORYSTATUSEX) (err error) [int32(failretval)==0] = kernel32.GlobalMemoryStatusEx
//sys regEnumValue(key registry.Key, index uint32, valueName *uint16, valueNameLen *uint32, reserved *uint32, valueType *uint32, pData *byte, cbData *uint32) (ret error) [failretval!=0] = advapi32.RegEnumValueW
//sys wscEnumProtocols(iProtocols *int32, protocolBuffer *wsaProtocolInfo, bufLen *uint32, errno *int32) (ret int32) = ws2_32.WSCEnumProtocols
//sys wscGetProviderInfo(providerId *windows.GUID, infoType _WSC_PROVIDER_INFO_TYPE, info unsafe.Pointer, infoSize *uintptr, flags uint32, errno *int32) (ret int32) = ws2_32.WSCGetProviderInfo

View File

@ -45,6 +45,7 @@ func logSupportInfo(logf logger.Logf, reason LogSupportInfoReason) {
const (
supportInfoKeyModules = "modules"
supportInfoKeyPageFile = "pageFile"
supportInfoKeyRegistry = "registry"
supportInfoKeySecurity = "securitySoftware"
supportInfoKeyWinsockLSP = "winsockLSP"
@ -60,6 +61,13 @@ func getSupportInfo(w io.Writer, reason LogSupportInfoReason) error {
output[supportInfoKeyRegistry] = err
}
pageFileInfo, err := getPageFileInfo()
if err == nil {
output[supportInfoKeyPageFile] = pageFileInfo
} else {
output[supportInfoKeyPageFile] = err
}
if reason == LogSupportInfoReasonBugReport {
modInfo, err := getModuleInfo()
if err == nil {
@ -589,3 +597,72 @@ func getSecurityInfo() map[string]any {
return result
}
type _MEMORYSTATUSEX struct {
Length uint32
MemoryLoad uint32
TotalPhys uint64
AvailPhys uint64
TotalPageFile uint64
AvailPageFile uint64
TotalVirtual uint64
AvailVirtual uint64
AvailExtendedVirtual uint64
}
func getPageFileInfo() (map[string]any, error) {
memStatus := _MEMORYSTATUSEX{
Length: uint32(unsafe.Sizeof(_MEMORYSTATUSEX{})),
}
if err := globalMemoryStatusEx(&memStatus); err != nil {
return nil, err
}
result := map[string]any{
"bytesAvailable": memStatus.AvailPageFile,
"bytesTotal": memStatus.TotalPageFile,
}
if entries, err := getEffectivePageFileValue(); err == nil {
// autoManaged is set to true when there is at least one page file that
// is automatically managed.
autoManaged := false
// If there is only one entry that consists of only one part, then
// the page files are 100% managed by the system.
// If there are multiple entries, then each one must be checked.
// Each entry then consists of three components, deliminated by spaces.
// If the latter two components are both "0", then that entry is auto-managed.
for _, entry := range entries {
if parts := strings.Split(entry, " "); (len(parts) == 1 && len(entries) == 1) ||
(len(parts) == 3 && parts[1] == "0" && parts[2] == "0") {
autoManaged = true
break
}
}
result["autoManaged"] = autoManaged
}
return result, nil
}
func getEffectivePageFileValue() ([]string, error) {
const subKey = `SYSTEM\CurrentControlSet\Control\Session Manager\Memory Management`
key, err := registry.OpenKey(registry.LOCAL_MACHINE, subKey, registry.QUERY_VALUE)
if err != nil {
return nil, err
}
defer key.Close()
// Rare but possible case: the user has updated their page file config but
// they haven't yet rebooted for the change to take effect. This is the
// current setting that the machine is still operating with.
if entries, _, err := key.GetStringsValue("ExistingPageFiles"); err == nil {
return entries, nil
}
// Otherwise we use this value (yes, the above value uses "Page" and this one uses "Paging").
entries, _, err := key.GetStringsValue("PagingFiles")
return entries, err
}

View File

@ -40,12 +40,14 @@ func errnoErr(e syscall.Errno) error {
var (
modadvapi32 = windows.NewLazySystemDLL("advapi32.dll")
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
modws2_32 = windows.NewLazySystemDLL("ws2_32.dll")
procRegEnumValueW = modadvapi32.NewProc("RegEnumValueW")
procWSCEnumProtocols = modws2_32.NewProc("WSCEnumProtocols")
procWSCGetProviderInfo = modws2_32.NewProc("WSCGetProviderInfo")
procWSCGetProviderPath = modws2_32.NewProc("WSCGetProviderPath")
procRegEnumValueW = modadvapi32.NewProc("RegEnumValueW")
procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx")
procWSCEnumProtocols = modws2_32.NewProc("WSCEnumProtocols")
procWSCGetProviderInfo = modws2_32.NewProc("WSCGetProviderInfo")
procWSCGetProviderPath = modws2_32.NewProc("WSCGetProviderPath")
)
func regEnumValue(key registry.Key, index uint32, valueName *uint16, valueNameLen *uint32, reserved *uint32, valueType *uint32, pData *byte, cbData *uint32) (ret error) {
@ -56,6 +58,14 @@ func regEnumValue(key registry.Key, index uint32, valueName *uint16, valueNameLe
return
}
func globalMemoryStatusEx(memStatus *_MEMORYSTATUSEX) (err error) {
r1, _, e1 := syscall.Syscall(procGlobalMemoryStatusEx.Addr(), 1, uintptr(unsafe.Pointer(memStatus)), 0, 0)
if int32(r1) == 0 {
err = errnoErr(e1)
}
return
}
func wscEnumProtocols(iProtocols *int32, protocolBuffer *wsaProtocolInfo, bufLen *uint32, errno *int32) (ret int32) {
r0, _, _ := syscall.Syscall6(procWSCEnumProtocols.Addr(), 4, uintptr(unsafe.Pointer(iProtocols)), uintptr(unsafe.Pointer(protocolBuffer)), uintptr(unsafe.Pointer(bufLen)), uintptr(unsafe.Pointer(errno)), 0, 0)
ret = int32(r0)