MACOS / NON LINUX metrics optimizations.

2025-08-27 16:00:29 -07:00 · 2025-08-27 16:00:29 -07:00 · 55e5c708fe
commit 55e5c708fe
parent 2d17cf1598
5 changed files with 186 additions and 47 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2187,7 +2187,7 @@ dependencies = [
 [[package]]
 name = "socktop_agent"
-version = "1.40.6"
+version = "1.40.61"
 dependencies = [
 "anyhow",
 "assert_cmd",
--- a/socktop_agent/Cargo.toml
+++ b/socktop_agent/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "socktop_agent"
-version = "1.40.6"
+version = "1.40.61"
 authors = ["Jason Witty <jasonpwitty+socktop@proton.me>"]
 description = "Remote system monitor over WebSocket, TUI like top"
 edition = "2021"
--- a/socktop_agent/src/metrics.rs
+++ b/socktop_agent/src/metrics.rs
@ -16,16 +16,7 @@ use std::time::{Duration, Instant};
 use sysinfo::{ProcessRefreshKind, ProcessesToUpdate};
 use tracing::warn;
-// CPU normalization only relevant for non-Linux (Linux path uses /proc deltas fixed to 0..100 per process)
+// NOTE: CPU normalization env removed; non-Linux now always reports per-process share (0..100) as given by sysinfo.
 #[cfg(not(target_os = "linux"))]
 fn normalize_cpu_enabled() -> bool {
    static ON: OnceCell<bool> = OnceCell::new();
    *ON.get_or_init(|| {
        std::env::var("SOCKTOP_AGENT_NORMALIZE_CPU")
            .map(|v| v != "0")
            .unwrap_or(false)
    })
 }
 // Runtime toggles (read once)
 fn gpu_enabled() -> bool {
    static ON: OnceCell<bool> = OnceCell::new();
@ -410,14 +401,28 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
    payload
 }
-/// Collect all processes (non-Linux): use sysinfo's internal CPU% by doing a double refresh.
+/// Collect all processes (non-Linux): optimized for reduced allocations and selective updates.
 #[cfg(not(target_os = "linux"))]
 pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
-    let ttl_ms: u64 = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS")
+    // Adaptive TTL based on system load
-        .ok()
+    let sys_guard = state.sys.lock().await;
-        .and_then(|v| v.parse().ok())
+    let load = sys_guard.global_cpu_usage();
-        .unwrap_or(2_000);
+    drop(sys_guard);
    let ttl_ms: u64 = if let Ok(v) = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS") {
        v.parse().unwrap_or(2_000)
    } else {
        // Adaptive TTL: longer when system is idle
        if load < 10.0 {
            4_000 // Light load
        } else if load < 30.0 {
            2_000 // Medium load
        } else {
            1_000 // High load
        }
    };
    let ttl = StdDuration::from_millis(ttl_ms);
    // Serve from cache if fresh
    {
        let cache = state.cache_processes.lock().await;
@ -428,49 +433,65 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
        }
    }
-    // Single refresh approach: rely on sysinfo's internal previous snapshot (so first call yields 0s, subsequent calls valid).
+    // Single efficient refresh: only update processes using significant CPU
    let (total_count, procs) = {
        let mut sys = state.sys.lock().await;
        let kind = ProcessRefreshKind::nothing().with_cpu().with_memory();
-        sys.refresh_processes_specifics(ProcessesToUpdate::All, false, kind);
+
-        sys.refresh_cpu_usage(); // update global so scaling comparison uses same interval
+        // Only refresh processes using >0.1% CPU
        sys.refresh_processes_specifics(
            ProcessesToUpdate::new().with_cpu_usage_higher_than(0.1),
            false,
            kind,
        );
        sys.refresh_cpu_usage();
        let total_count = sys.processes().len();
-        let norm = normalize_cpu_enabled();
+
-        let mut list: Vec<ProcessInfo> = sys
+        // Reuse allocations via process cache
-            .processes()
+        let mut proc_cache = state.proc_cache.lock().await;
-            .values()
+        proc_cache.reusable_vec.clear();
-            .map(|p| {
+
        // Filter and collect processes with meaningful CPU usage
        for p in sys.processes().values() {
            let raw = p.cpu_usage();
-                // Treat raw as share of total machine (0..100). Normalization flag currently just clamps.
+            if raw > 0.1 {
-                let cpu = if norm { raw.clamp(0.0, 100.0) } else { raw };
+                // Skip negligible CPU users
-                ProcessInfo {
+                let pid = p.pid().as_u32();
-                    pid: p.pid().as_u32(),
+
-                    name: p.name().to_string_lossy().into_owned(),
+                // Reuse cached name if available
-                    cpu_usage: cpu,
+                let name = if let Some(cached) = proc_cache.names.get(&pid) {
-                    mem_bytes: p.memory(),
+                    cached.clone()
-                }
+                } else {
-            })
+                    let new_name = p.name().to_string_lossy().into_owned();
-            .collect();
+                    proc_cache.names.insert(pid, new_name.clone());
-        // Optional global reconciliation: align sum of per-process CPU with global if significantly off (e.g. factor >1.2 or <0.8)
+                    new_name
        let sum: f32 = list.iter().map(|p| p.cpu_usage).sum();
        let global = sys.global_cpu_usage();
        if sum > 0.0 && global > 0.0 {
            let ratio = global / sum; // if <1, we are over-summing; if >1 under-summing
            if ratio < 0.8 || ratio > 1.2 {
                // scale gently toward global but not fully (to reduce jitter)
                let adj = (ratio * 0.5) + 0.5; // halfway to target
                for p in &mut list {
                    p.cpu_usage = (p.cpu_usage * adj).clamp(0.0, 100.0);
                }
            }
        }
        (total_count, list)
                };
                proc_cache.reusable_vec.push(ProcessInfo {
                    pid,
                    name,
                    cpu_usage: raw.clamp(0.0, 100.0),
                    mem_bytes: p.memory(),
                });
            }
        }
        // Clean up old process names periodically
        if total_count > proc_cache.names.len() + 100 {
            proc_cache
                .names
                .retain(|pid, _| sys.processes().contains_key(&sysinfo::Pid::from_u32(*pid)));
        }
        (total_count, proc_cache.reusable_vec.clone())
    };
    let payload = ProcessesPayload {
        process_count: total_count,
        top_processes: procs,
    };
    {
        let mut cache = state.cache_processes.lock().await;
        cache.set(payload.clone());
--- a/socktop_agent/src/metrics_new.rs
+++ b/socktop_agent/src/metrics_new.rs
@ -0,0 +1,96 @@
 /// Collect all processes (non-Linux): optimized for reduced allocations and selective updates.
 #[cfg(not(target_os = "linux"))]
 pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
    // Adaptive TTL based on system load
    let sys_guard = state.sys.lock().await;
    let load = sys_guard.global_cpu_usage();
    drop(sys_guard);
    let ttl_ms: u64 = if let Ok(v) = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS") {
        v.parse().unwrap_or(2_000)
    } else {
        // Adaptive TTL: longer when system is idle
        if load < 10.0 {
            4_000 // Light load
        } else if load < 30.0 {
            2_000 // Medium load
        } else {
            1_000 // High load
        }
    };
    let ttl = StdDuration::from_millis(ttl_ms);
    // Serve from cache if fresh
    {
        let cache = state.cache_processes.lock().await;
        if cache.is_fresh(ttl) {
            if let Some(v) = cache.take_clone() {
                return v;
            }
        }
    }
    // Single efficient refresh: only update processes using significant CPU
    let (total_count, procs) = {
        let mut sys = state.sys.lock().await;
        let kind = ProcessRefreshKind::nothing().with_cpu().with_memory();
        // Only refresh processes using >0.1% CPU
        sys.refresh_processes_specifics(
            ProcessesToUpdate::new().with_cpu_usage_higher_than(0.1),
            false,
            kind
        );
        sys.refresh_cpu_usage();
        let total_count = sys.processes().len();
        // Reuse allocations via process cache
        let mut proc_cache = state.proc_cache.lock().await;
        proc_cache.reusable_vec.clear();
        // Filter and collect processes with meaningful CPU usage
        for p in sys.processes().values() {
            let raw = p.cpu_usage();
            if raw > 0.1 { // Skip negligible CPU users
                let pid = p.pid().as_u32();
                // Reuse cached name if available
                let name = if let Some(cached) = proc_cache.names.get(&pid) {
                    cached.clone()
                } else {
                    let new_name = p.name().to_string_lossy().into_owned();
                    proc_cache.names.insert(pid, new_name.clone());
                    new_name
                };
                proc_cache.reusable_vec.push(ProcessInfo {
                    pid,
                    name,
                    cpu_usage: raw.clamp(0.0, 100.0),
                    mem_bytes: p.memory(),
                });
            }
        }
        // Clean up old process names periodically
        if total_count > proc_cache.names.len() + 100 {
            proc_cache.names.retain(|pid, _| 
                sys.processes().contains_key(&sysinfo::Pid::from_u32(*pid))
            );
        }
        (total_count, proc_cache.reusable_vec.clone())
    };
    let payload = ProcessesPayload {
        process_count: total_count,
        top_processes: procs,
    };
    {
        let mut cache = state.cache_processes.lock().await;
        cache.set(payload.clone());
    }
    payload
 }
--- a/socktop_agent/src/state.rs
+++ b/socktop_agent/src/state.rs
@ -20,6 +20,22 @@ pub struct ProcCpuTracker {
    pub last_per_pid: HashMap<u32, u64>,
 }
 #[cfg(not(target_os = "linux"))]
 pub struct ProcessCache {
    pub names: HashMap<u32, String>,
    pub reusable_vec: Vec<crate::types::ProcessInfo>,
 }
 #[cfg(not(target_os = "linux"))]
 impl Default for ProcessCache {
    fn default() -> Self {
        Self {
            names: HashMap::with_capacity(256),
            reusable_vec: Vec::with_capacity(256),
        }
    }
 }
 #[derive(Clone)]
 pub struct AppState {
    pub sys: SharedSystem,
@ -32,6 +48,10 @@ pub struct AppState {
    #[cfg(target_os = "linux")]
    pub proc_cpu: Arc<Mutex<ProcCpuTracker>>,
    // Process name caching and vector reuse for non-Linux to reduce allocations
    #[cfg(not(target_os = "linux"))]
    pub proc_cache: Arc<Mutex<ProcessCache>>,
    // Connection tracking (to allow future idle sleeps if desired)
    pub client_count: Arc<AtomicUsize>,
@ -89,6 +109,8 @@ impl AppState {
            hostname: System::host_name().unwrap_or_else(|| "unknown".into()),
            #[cfg(target_os = "linux")]
            proc_cpu: Arc::new(Mutex::new(ProcCpuTracker::default())),
            #[cfg(not(target_os = "linux"))]
            proc_cache: Arc::new(Mutex::new(ProcessCache::default())),
            client_count: Arc::new(AtomicUsize::new(0)),
            auth_token: std::env::var("SOCKTOP_TOKEN")
                .ok()