monitoring.go

219 lines
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
package internal

import (
	"fmt"
	"os"
	"os/exec"
	"runtime"
	"strings"
	"sync"
	"syscall"
	"time"
)

// SystemStats holds system resource information.
type SystemStats struct {
	Hostname    string
	OS          string
	Arch        string
	NumCPU      int
	CPUPercent  int
	MemTotal    string
	MemUsed     string
	MemPercent  int
	DiskTotal   string
	DiskUsed    string
	DiskPercent int
	DataTotal   string // data volume (if separate from root)
	DataUsed    string
	DataPercent int
	HasDataDisk bool // true when data dir is on a different filesystem
	LoadAvg     string
}

var (
	cpuMu      sync.Mutex
	lastCPU    cpuSample
	cpuPercent int
)

type cpuSample struct {
	total uint64
	idle  uint64
	time  time.Time
}

func init() {
	// Seed initial CPU sample
	lastCPU = readCPUSample()

	// Background CPU sampler — updates every 2 seconds
	go func() {
		for {
			time.Sleep(2 * time.Second)
			cur := readCPUSample()
			cpuMu.Lock()
			if cur.total > lastCPU.total {
				totalDelta := cur.total - lastCPU.total
				idleDelta := cur.idle - lastCPU.idle
				if totalDelta > 0 {
					cpuPercent = int((totalDelta - idleDelta) * 100 / totalDelta)
				}
			}
			lastCPU = cur
			cpuMu.Unlock()
		}
	}()
}

// GetSystemStats reads system metrics from /proc and syscall.
func GetSystemStats() *SystemStats {
	hostname := readHostname()

	cpuMu.Lock()
	cpu := cpuPercent
	cpuMu.Unlock()

	stats := &SystemStats{
		Hostname:   hostname,
		OS:         runtime.GOOS,
		Arch:       runtime.GOARCH,
		NumCPU:     runtime.NumCPU(),
		CPUPercent: cpu,
	}

	// Memory from /proc/meminfo
	if data, err := os.ReadFile("/proc/meminfo"); err == nil {
		var memTotal, memAvailable uint64
		for _, line := range strings.Split(string(data), "\n") {
			fields := strings.Fields(line)
			if len(fields) < 2 {
				continue
			}
			val := parseUint(fields[1])
			switch fields[0] {
			case "MemTotal:":
				memTotal = val * 1024
			case "MemAvailable:":
				memAvailable = val * 1024
			}
		}
		if memTotal > 0 {
			memUsed := memTotal - memAvailable
			stats.MemTotal = formatBytes(memTotal)
			stats.MemUsed = formatBytes(memUsed)
			stats.MemPercent = int(memUsed * 100 / memTotal)
		}
	}

	// Disk usage — root filesystem
	var rootStat syscall.Statfs_t
	if err := syscall.Statfs("/", &rootStat); err == nil {
		total := rootStat.Blocks * uint64(rootStat.Bsize)
		free := rootStat.Bavail * uint64(rootStat.Bsize)
		used := total - free
		if total > 0 {
			stats.DiskTotal = formatBytes(total)
			stats.DiskUsed = formatBytes(used)
			stats.DiskPercent = int(used * 100 / total)
		}
	}

	// Data volume — if on a separate filesystem from root
	var dataStat syscall.Statfs_t
	if err := syscall.Statfs(dataDir, &dataStat); err == nil {
		if dataStat.Fsid != rootStat.Fsid {
			stats.HasDataDisk = true
			total := dataStat.Blocks * uint64(dataStat.Bsize)
			free := dataStat.Bavail * uint64(dataStat.Bsize)
			used := total - free
			if total > 0 {
				stats.DataTotal = formatBytes(total)
				stats.DataUsed = formatBytes(used)
				stats.DataPercent = int(used * 100 / total)
			}
		}
	}

	// Load average from /proc/loadavg
	if data, err := os.ReadFile("/proc/loadavg"); err == nil {
		fields := strings.Fields(string(data))
		if len(fields) >= 1 {
			stats.LoadAvg = fields[0]
		}
	}

	return stats
}

// readHostname returns a meaningful hostname. Inside Docker, os.Hostname()
// returns the container ID, so we try the system domain or docker host name.
func readHostname() string {
	// Prefer the configured domain
	if d := SystemDomain(); d != "" {
		return d
	}
	// Try reading the Docker host's hostname via docker info
	if out, err := exec.Command("docker", "info", "--format", "{{.Name}}").Output(); err == nil {
		if name := strings.TrimSpace(string(out)); name != "" {
			return name
		}
	}
	hostname, _ := os.Hostname()
	return hostname
}

// readCPUSample reads aggregate CPU jiffies from /proc/stat.
func readCPUSample() cpuSample {
	data, err := os.ReadFile("/proc/stat")
	if err != nil {
		return cpuSample{time: time.Now()}
	}
	// First line: cpu user nice system idle iowait irq softirq steal
	line := strings.SplitN(string(data), "\n", 2)[0]
	fields := strings.Fields(line)
	if len(fields) < 5 || fields[0] != "cpu" {
		return cpuSample{time: time.Now()}
	}

	var total, idle uint64
	for i, f := range fields[1:] {
		val := parseUint(f)
		total += val
		if i == 3 { // idle is the 4th value (index 3)
			idle = val
		}
	}
	return cpuSample{total: total, idle: idle, time: time.Now()}
}

func parseUint(s string) uint64 {
	var n uint64
	for _, ch := range s {
		if ch >= '0' && ch <= '9' {
			n = n*10 + uint64(ch-'0')
		}
	}
	return n
}

func formatBytes(b uint64) string {
	const (
		KB = 1024
		MB = KB * 1024
		GB = MB * 1024
		TB = GB * 1024
	)
	switch {
	case b >= TB:
		return fmt.Sprintf("%.1f TB", float64(b)/float64(TB))
	case b >= GB:
		return fmt.Sprintf("%.1f GB", float64(b)/float64(GB))
	case b >= MB:
		return fmt.Sprintf("%.1f MB", float64(b)/float64(MB))
	case b >= KB:
		return fmt.Sprintf("%.1f KB", float64(b)/float64(KB))
	default:
		return fmt.Sprintf("%d B", b)
	}
}