diff --git a/Cargo.lock b/Cargo.lock index c73ba5a..85a05ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2187,7 +2187,7 @@ dependencies = [ [[package]] name = "socktop_agent" -version = "1.40.5" +version = "1.40.6" dependencies = [ "anyhow", "assert_cmd", diff --git a/socktop_agent/Cargo.toml b/socktop_agent/Cargo.toml index 11a97cf..bc31867 100644 --- a/socktop_agent/Cargo.toml +++ b/socktop_agent/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "socktop_agent" -version = "1.40.5" +version = "1.40.6" authors = ["Jason Witty "] description = "Remote system monitor over WebSocket, TUI like top" edition = "2021" diff --git a/socktop_agent/src/metrics.rs b/socktop_agent/src/metrics.rs index 74f96b7..5a1f8ea 100644 --- a/socktop_agent/src/metrics.rs +++ b/socktop_agent/src/metrics.rs @@ -413,18 +413,12 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload { /// Collect all processes (non-Linux): use sysinfo's internal CPU% by doing a double refresh. #[cfg(not(target_os = "linux"))] pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload { - use tokio::time::sleep; let ttl_ms: u64 = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(2_000); - // Delay between the two refresh calls used to compute CPU% (ms). Smaller delay lowers - // accuracy slightly but reduces overall CPU overhead. Default 180ms. - let delay_ms: u64 = std::env::var("SOCKTOP_AGENT_PROC_CPU_DELAY_MS") - .ok() - .and_then(|v| v.parse().ok()) - .unwrap_or(180); let ttl = StdDuration::from_millis(ttl_ms); + // Serve from cache if fresh { let cache = state.cache_processes.lock().await; if cache.is_fresh(ttl) { @@ -433,38 +427,23 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload { } } } - // First refresh: everything (establish baseline including memory/name etc.) - { - let mut sys = state.sys.lock().await; - // Limit to CPU + memory for baseline (avoids gathering env/cwd/cmd each time) - let kind = ProcessRefreshKind::nothing().with_cpu().with_memory(); - sys.refresh_processes_specifics(ProcessesToUpdate::All, false, kind); - } - // Sleep briefly to allow cpu deltas to accumulate; 200-250ms is typical; we keep 200ms to lower agent overhead. - sleep(Duration::from_millis(delay_ms.min(500))).await; - // Second refresh: only CPU counters (lighter than full everything) to reduce overhead. + + // Single refresh approach: rely on sysinfo's internal previous snapshot (so first call yields 0s, subsequent calls valid). let (total_count, procs) = { let mut sys = state.sys.lock().await; - // Build a lightweight refresh kind: only CPU times. - let cpu_only = ProcessRefreshKind::nothing().with_cpu(); - sys.refresh_processes_specifics(ProcessesToUpdate::All, false, cpu_only); - // Refresh global CPU usage once for scaling heuristic - sys.refresh_cpu_usage(); + let kind = ProcessRefreshKind::nothing().with_cpu().with_memory(); + sys.refresh_processes_specifics(ProcessesToUpdate::All, false, kind); + sys.refresh_cpu_usage(); // update global so scaling comparison uses same interval + let total_count = sys.processes().len(); let norm = normalize_cpu_enabled(); - let cores = sys.cpus().len().max(1) as f32; let mut list: Vec = sys .processes() .values() .map(|p| { let raw = p.cpu_usage(); - // sysinfo (non-Linux) returns aggregated CPU% fraction of total machine (0..100). - // Present multi-core semantics by multiplying by logical core count unless normalized. - let cpu = if norm { - raw.clamp(0.0, 100.0) - } else { - (raw * cores).clamp(0.0, 100.0 * cores) - }; + // Treat raw as share of total machine (0..100). Normalization flag currently just clamps. + let cpu = if norm { raw.clamp(0.0, 100.0) } else { raw }; ProcessInfo { pid: p.pid().as_u32(), name: p.name().to_string_lossy().into_owned(), @@ -473,7 +452,19 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload { } }) .collect(); - // No scaling heuristic needed in multi-core mode; sums may exceed 100. + // Optional global reconciliation: align sum of per-process CPU with global if significantly off (e.g. factor >1.2 or <0.8) + let sum: f32 = list.iter().map(|p| p.cpu_usage).sum(); + let global = sys.global_cpu_usage(); + if sum > 0.0 && global > 0.0 { + let ratio = global / sum; // if <1, we are over-summing; if >1 under-summing + if ratio < 0.8 || ratio > 1.2 { + // scale gently toward global but not fully (to reduce jitter) + let adj = (ratio * 0.5) + 0.5; // halfway to target + for p in &mut list { + p.cpu_usage = (p.cpu_usage * adj).clamp(0.0, 100.0); + } + } + } (total_count, list) }; let payload = ProcessesPayload {