MACOS / NON LINUX metrics optimizations.

2025-08-27 16:00:29 -07:00 · 2025-08-27 16:00:29 -07:00 · 55e5c708fe
commit 55e5c708fe
parent 2d17cf1598
5 changed files with 186 additions and 47 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2187,7 +2187,7 @@ dependencies = [

 [[package]]
 name = "socktop_agent"
-version = "1.40.6"
+version = "1.40.61"
 dependencies = [
 "anyhow",
 "assert_cmd",
--- a/socktop_agent/Cargo.toml
+++ b/socktop_agent/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "socktop_agent"
-version = "1.40.6"
+version = "1.40.61"
 authors = ["Jason Witty <jasonpwitty+socktop@proton.me>"]
 description = "Remote system monitor over WebSocket, TUI like top"
 edition = "2021"
--- a/socktop_agent/src/metrics.rs
+++ b/socktop_agent/src/metrics.rs
@ -16,16 +16,7 @@ use std::time::{Duration, Instant};
 use sysinfo::{ProcessRefreshKind, ProcessesToUpdate};
 use tracing::warn;

-// CPU normalization only relevant for non-Linux (Linux path uses /proc deltas fixed to 0..100 per process)
-#[cfg(not(target_os = "linux"))]
-fn normalize_cpu_enabled() -> bool {
-    static ON: OnceCell<bool> = OnceCell::new();
-    *ON.get_or_init(|| {
-        std::env::var("SOCKTOP_AGENT_NORMALIZE_CPU")
-            .map(|v| v != "0")
-            .unwrap_or(false)
-    })
-}
+// NOTE: CPU normalization env removed; non-Linux now always reports per-process share (0..100) as given by sysinfo.
 // Runtime toggles (read once)
 fn gpu_enabled() -> bool {
    static ON: OnceCell<bool> = OnceCell::new();
@ -410,14 +401,28 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
    payload
 }

-/// Collect all processes (non-Linux): use sysinfo's internal CPU% by doing a double refresh.
+/// Collect all processes (non-Linux): optimized for reduced allocations and selective updates.
 #[cfg(not(target_os = "linux"))]
 pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
-    let ttl_ms: u64 = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS")
-        .ok()
-        .and_then(|v| v.parse().ok())
-        .unwrap_or(2_000);
+    // Adaptive TTL based on system load
+    let sys_guard = state.sys.lock().await;
+    let load = sys_guard.global_cpu_usage();
+    drop(sys_guard);
+
+    let ttl_ms: u64 = if let Ok(v) = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS") {
+        v.parse().unwrap_or(2_000)
+    } else {
+        // Adaptive TTL: longer when system is idle
+        if load < 10.0 {
+            4_000 // Light load
+        } else if load < 30.0 {
+            2_000 // Medium load
+        } else {
+            1_000 // High load
+        }
+    };
    let ttl = StdDuration::from_millis(ttl_ms);
+
    // Serve from cache if fresh
    {
        let cache = state.cache_processes.lock().await;
@ -428,49 +433,65 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
        }
    }

-    // Single refresh approach: rely on sysinfo's internal previous snapshot (so first call yields 0s, subsequent calls valid).
+    // Single efficient refresh: only update processes using significant CPU
    let (total_count, procs) = {
        let mut sys = state.sys.lock().await;
        let kind = ProcessRefreshKind::nothing().with_cpu().with_memory();
-        sys.refresh_processes_specifics(ProcessesToUpdate::All, false, kind);
-        sys.refresh_cpu_usage(); // update global so scaling comparison uses same interval
+
+        // Only refresh processes using >0.1% CPU
+        sys.refresh_processes_specifics(
+            ProcessesToUpdate::new().with_cpu_usage_higher_than(0.1),
+            false,
+            kind,
+        );
+        sys.refresh_cpu_usage();

        let total_count = sys.processes().len();
-        let norm = normalize_cpu_enabled();
-        let mut list: Vec<ProcessInfo> = sys
-            .processes()
-            .values()
-            .map(|p| {
-                let raw = p.cpu_usage();
-                // Treat raw as share of total machine (0..100). Normalization flag currently just clamps.
-                let cpu = if norm { raw.clamp(0.0, 100.0) } else { raw };
-                ProcessInfo {
-                    pid: p.pid().as_u32(),
-                    name: p.name().to_string_lossy().into_owned(),
-                    cpu_usage: cpu,
+
+        // Reuse allocations via process cache
+        let mut proc_cache = state.proc_cache.lock().await;
+        proc_cache.reusable_vec.clear();
+
+        // Filter and collect processes with meaningful CPU usage
+        for p in sys.processes().values() {
+            let raw = p.cpu_usage();
+            if raw > 0.1 {
+                // Skip negligible CPU users
+                let pid = p.pid().as_u32();
+
+                // Reuse cached name if available
+                let name = if let Some(cached) = proc_cache.names.get(&pid) {
+                    cached.clone()
+                } else {
+                    let new_name = p.name().to_string_lossy().into_owned();
+                    proc_cache.names.insert(pid, new_name.clone());
+                    new_name
+                };
+
+                proc_cache.reusable_vec.push(ProcessInfo {
+                    pid,
+                    name,
+                    cpu_usage: raw.clamp(0.0, 100.0),
                    mem_bytes: p.memory(),
-                }
-            })
-            .collect();
-        // Optional global reconciliation: align sum of per-process CPU with global if significantly off (e.g. factor >1.2 or <0.8)
-        let sum: f32 = list.iter().map(|p| p.cpu_usage).sum();
-        let global = sys.global_cpu_usage();
-        if sum > 0.0 && global > 0.0 {
-            let ratio = global / sum; // if <1, we are over-summing; if >1 under-summing
-            if ratio < 0.8 || ratio > 1.2 {
-                // scale gently toward global but not fully (to reduce jitter)
-                let adj = (ratio * 0.5) + 0.5; // halfway to target
-                for p in &mut list {
-                    p.cpu_usage = (p.cpu_usage * adj).clamp(0.0, 100.0);
-                }
+                });
            }
        }
-        (total_count, list)
+
+        // Clean up old process names periodically
+        if total_count > proc_cache.names.len() + 100 {
+            proc_cache
+                .names
+                .retain(|pid, _| sys.processes().contains_key(&sysinfo::Pid::from_u32(*pid)));
+        }
+
+        (total_count, proc_cache.reusable_vec.clone())
    };
+
    let payload = ProcessesPayload {
        process_count: total_count,
        top_processes: procs,
    };
+
    {
        let mut cache = state.cache_processes.lock().await;
        cache.set(payload.clone());
--- a/socktop_agent/src/metrics_new.rs
+++ b/socktop_agent/src/metrics_new.rs
@ -0,0 +1,96 @@
+/// Collect all processes (non-Linux): optimized for reduced allocations and selective updates.
+#[cfg(not(target_os = "linux"))]
+pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
+    // Adaptive TTL based on system load
+    let sys_guard = state.sys.lock().await;
+    let load = sys_guard.global_cpu_usage();
+    drop(sys_guard);
+    
+    let ttl_ms: u64 = if let Ok(v) = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS") {
+        v.parse().unwrap_or(2_000)
+    } else {
+        // Adaptive TTL: longer when system is idle
+        if load < 10.0 {
+            4_000 // Light load
+        } else if load < 30.0 {
+            2_000 // Medium load
+        } else {
+            1_000 // High load
+        }
+    };
+    let ttl = StdDuration::from_millis(ttl_ms);
+
+    // Serve from cache if fresh
+    {
+        let cache = state.cache_processes.lock().await;
+        if cache.is_fresh(ttl) {
+            if let Some(v) = cache.take_clone() {
+                return v;
+            }
+        }
+    }
+
+    // Single efficient refresh: only update processes using significant CPU
+    let (total_count, procs) = {
+        let mut sys = state.sys.lock().await;
+        let kind = ProcessRefreshKind::nothing().with_cpu().with_memory();
+        
+        // Only refresh processes using >0.1% CPU
+        sys.refresh_processes_specifics(
+            ProcessesToUpdate::new().with_cpu_usage_higher_than(0.1),
+            false,
+            kind
+        );
+        sys.refresh_cpu_usage();
+
+        let total_count = sys.processes().len();
+        
+        // Reuse allocations via process cache
+        let mut proc_cache = state.proc_cache.lock().await;
+        proc_cache.reusable_vec.clear();
+        
+        // Filter and collect processes with meaningful CPU usage
+        for p in sys.processes().values() {
+            let raw = p.cpu_usage();
+            if raw > 0.1 { // Skip negligible CPU users
+                let pid = p.pid().as_u32();
+                
+                // Reuse cached name if available
+                let name = if let Some(cached) = proc_cache.names.get(&pid) {
+                    cached.clone()
+                } else {
+                    let new_name = p.name().to_string_lossy().into_owned();
+                    proc_cache.names.insert(pid, new_name.clone());
+                    new_name
+                };
+                
+                proc_cache.reusable_vec.push(ProcessInfo {
+                    pid,
+                    name,
+                    cpu_usage: raw.clamp(0.0, 100.0),
+                    mem_bytes: p.memory(),
+                });
+            }
+        }
+
+        // Clean up old process names periodically
+        if total_count > proc_cache.names.len() + 100 {
+            proc_cache.names.retain(|pid, _| 
+                sys.processes().contains_key(&sysinfo::Pid::from_u32(*pid))
+            );
+        }
+
+        (total_count, proc_cache.reusable_vec.clone())
+    };
+
+    let payload = ProcessesPayload {
+        process_count: total_count,
+        top_processes: procs,
+    };
+    
+    {
+        let mut cache = state.cache_processes.lock().await;
+        cache.set(payload.clone());
+    }
+    payload
+}
--- a/socktop_agent/src/state.rs
+++ b/socktop_agent/src/state.rs
@ -20,6 +20,22 @@ pub struct ProcCpuTracker {
    pub last_per_pid: HashMap<u32, u64>,
 }

+#[cfg(not(target_os = "linux"))]
+pub struct ProcessCache {
+    pub names: HashMap<u32, String>,
+    pub reusable_vec: Vec<crate::types::ProcessInfo>,
+}
+
+#[cfg(not(target_os = "linux"))]
+impl Default for ProcessCache {
+    fn default() -> Self {
+        Self {
+            names: HashMap::with_capacity(256),
+            reusable_vec: Vec::with_capacity(256),
+        }
+    }
+}
+
 #[derive(Clone)]
 pub struct AppState {
    pub sys: SharedSystem,
@ -32,6 +48,10 @@ pub struct AppState {
    #[cfg(target_os = "linux")]
    pub proc_cpu: Arc<Mutex<ProcCpuTracker>>,

+    // Process name caching and vector reuse for non-Linux to reduce allocations
+    #[cfg(not(target_os = "linux"))]
+    pub proc_cache: Arc<Mutex<ProcessCache>>,
+
    // Connection tracking (to allow future idle sleeps if desired)
    pub client_count: Arc<AtomicUsize>,

@ -89,6 +109,8 @@ impl AppState {
            hostname: System::host_name().unwrap_or_else(|| "unknown".into()),
            #[cfg(target_os = "linux")]
            proc_cpu: Arc::new(Mutex::new(ProcCpuTracker::default())),
+            #[cfg(not(target_os = "linux"))]
+            proc_cache: Arc::new(Mutex::new(ProcessCache::default())),
            client_count: Arc::new(AtomicUsize::new(0)),
            auth_token: std::env::var("SOCKTOP_TOKEN")
                .ok()