MACOS / NON LINUX metrics optimizations.
This commit is contained in:
parent
2d17cf1598
commit
55e5c708fe
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -2187,7 +2187,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "socktop_agent"
|
name = "socktop_agent"
|
||||||
version = "1.40.6"
|
version = "1.40.61"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"assert_cmd",
|
"assert_cmd",
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "socktop_agent"
|
name = "socktop_agent"
|
||||||
version = "1.40.6"
|
version = "1.40.61"
|
||||||
authors = ["Jason Witty <jasonpwitty+socktop@proton.me>"]
|
authors = ["Jason Witty <jasonpwitty+socktop@proton.me>"]
|
||||||
description = "Remote system monitor over WebSocket, TUI like top"
|
description = "Remote system monitor over WebSocket, TUI like top"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|||||||
@ -16,16 +16,7 @@ use std::time::{Duration, Instant};
|
|||||||
use sysinfo::{ProcessRefreshKind, ProcessesToUpdate};
|
use sysinfo::{ProcessRefreshKind, ProcessesToUpdate};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
// CPU normalization only relevant for non-Linux (Linux path uses /proc deltas fixed to 0..100 per process)
|
// NOTE: CPU normalization env removed; non-Linux now always reports per-process share (0..100) as given by sysinfo.
|
||||||
#[cfg(not(target_os = "linux"))]
|
|
||||||
fn normalize_cpu_enabled() -> bool {
|
|
||||||
static ON: OnceCell<bool> = OnceCell::new();
|
|
||||||
*ON.get_or_init(|| {
|
|
||||||
std::env::var("SOCKTOP_AGENT_NORMALIZE_CPU")
|
|
||||||
.map(|v| v != "0")
|
|
||||||
.unwrap_or(false)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
// Runtime toggles (read once)
|
// Runtime toggles (read once)
|
||||||
fn gpu_enabled() -> bool {
|
fn gpu_enabled() -> bool {
|
||||||
static ON: OnceCell<bool> = OnceCell::new();
|
static ON: OnceCell<bool> = OnceCell::new();
|
||||||
@ -410,14 +401,28 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
|
|||||||
payload
|
payload
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Collect all processes (non-Linux): use sysinfo's internal CPU% by doing a double refresh.
|
/// Collect all processes (non-Linux): optimized for reduced allocations and selective updates.
|
||||||
#[cfg(not(target_os = "linux"))]
|
#[cfg(not(target_os = "linux"))]
|
||||||
pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
|
pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
|
||||||
let ttl_ms: u64 = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS")
|
// Adaptive TTL based on system load
|
||||||
.ok()
|
let sys_guard = state.sys.lock().await;
|
||||||
.and_then(|v| v.parse().ok())
|
let load = sys_guard.global_cpu_usage();
|
||||||
.unwrap_or(2_000);
|
drop(sys_guard);
|
||||||
|
|
||||||
|
let ttl_ms: u64 = if let Ok(v) = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS") {
|
||||||
|
v.parse().unwrap_or(2_000)
|
||||||
|
} else {
|
||||||
|
// Adaptive TTL: longer when system is idle
|
||||||
|
if load < 10.0 {
|
||||||
|
4_000 // Light load
|
||||||
|
} else if load < 30.0 {
|
||||||
|
2_000 // Medium load
|
||||||
|
} else {
|
||||||
|
1_000 // High load
|
||||||
|
}
|
||||||
|
};
|
||||||
let ttl = StdDuration::from_millis(ttl_ms);
|
let ttl = StdDuration::from_millis(ttl_ms);
|
||||||
|
|
||||||
// Serve from cache if fresh
|
// Serve from cache if fresh
|
||||||
{
|
{
|
||||||
let cache = state.cache_processes.lock().await;
|
let cache = state.cache_processes.lock().await;
|
||||||
@ -428,49 +433,65 @@ pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single refresh approach: rely on sysinfo's internal previous snapshot (so first call yields 0s, subsequent calls valid).
|
// Single efficient refresh: only update processes using significant CPU
|
||||||
let (total_count, procs) = {
|
let (total_count, procs) = {
|
||||||
let mut sys = state.sys.lock().await;
|
let mut sys = state.sys.lock().await;
|
||||||
let kind = ProcessRefreshKind::nothing().with_cpu().with_memory();
|
let kind = ProcessRefreshKind::nothing().with_cpu().with_memory();
|
||||||
sys.refresh_processes_specifics(ProcessesToUpdate::All, false, kind);
|
|
||||||
sys.refresh_cpu_usage(); // update global so scaling comparison uses same interval
|
// Only refresh processes using >0.1% CPU
|
||||||
|
sys.refresh_processes_specifics(
|
||||||
|
ProcessesToUpdate::new().with_cpu_usage_higher_than(0.1),
|
||||||
|
false,
|
||||||
|
kind,
|
||||||
|
);
|
||||||
|
sys.refresh_cpu_usage();
|
||||||
|
|
||||||
let total_count = sys.processes().len();
|
let total_count = sys.processes().len();
|
||||||
let norm = normalize_cpu_enabled();
|
|
||||||
let mut list: Vec<ProcessInfo> = sys
|
// Reuse allocations via process cache
|
||||||
.processes()
|
let mut proc_cache = state.proc_cache.lock().await;
|
||||||
.values()
|
proc_cache.reusable_vec.clear();
|
||||||
.map(|p| {
|
|
||||||
|
// Filter and collect processes with meaningful CPU usage
|
||||||
|
for p in sys.processes().values() {
|
||||||
let raw = p.cpu_usage();
|
let raw = p.cpu_usage();
|
||||||
// Treat raw as share of total machine (0..100). Normalization flag currently just clamps.
|
if raw > 0.1 {
|
||||||
let cpu = if norm { raw.clamp(0.0, 100.0) } else { raw };
|
// Skip negligible CPU users
|
||||||
ProcessInfo {
|
let pid = p.pid().as_u32();
|
||||||
pid: p.pid().as_u32(),
|
|
||||||
name: p.name().to_string_lossy().into_owned(),
|
// Reuse cached name if available
|
||||||
cpu_usage: cpu,
|
let name = if let Some(cached) = proc_cache.names.get(&pid) {
|
||||||
mem_bytes: p.memory(),
|
cached.clone()
|
||||||
}
|
} else {
|
||||||
})
|
let new_name = p.name().to_string_lossy().into_owned();
|
||||||
.collect();
|
proc_cache.names.insert(pid, new_name.clone());
|
||||||
// Optional global reconciliation: align sum of per-process CPU with global if significantly off (e.g. factor >1.2 or <0.8)
|
new_name
|
||||||
let sum: f32 = list.iter().map(|p| p.cpu_usage).sum();
|
|
||||||
let global = sys.global_cpu_usage();
|
|
||||||
if sum > 0.0 && global > 0.0 {
|
|
||||||
let ratio = global / sum; // if <1, we are over-summing; if >1 under-summing
|
|
||||||
if ratio < 0.8 || ratio > 1.2 {
|
|
||||||
// scale gently toward global but not fully (to reduce jitter)
|
|
||||||
let adj = (ratio * 0.5) + 0.5; // halfway to target
|
|
||||||
for p in &mut list {
|
|
||||||
p.cpu_usage = (p.cpu_usage * adj).clamp(0.0, 100.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(total_count, list)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
proc_cache.reusable_vec.push(ProcessInfo {
|
||||||
|
pid,
|
||||||
|
name,
|
||||||
|
cpu_usage: raw.clamp(0.0, 100.0),
|
||||||
|
mem_bytes: p.memory(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up old process names periodically
|
||||||
|
if total_count > proc_cache.names.len() + 100 {
|
||||||
|
proc_cache
|
||||||
|
.names
|
||||||
|
.retain(|pid, _| sys.processes().contains_key(&sysinfo::Pid::from_u32(*pid)));
|
||||||
|
}
|
||||||
|
|
||||||
|
(total_count, proc_cache.reusable_vec.clone())
|
||||||
|
};
|
||||||
|
|
||||||
let payload = ProcessesPayload {
|
let payload = ProcessesPayload {
|
||||||
process_count: total_count,
|
process_count: total_count,
|
||||||
top_processes: procs,
|
top_processes: procs,
|
||||||
};
|
};
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut cache = state.cache_processes.lock().await;
|
let mut cache = state.cache_processes.lock().await;
|
||||||
cache.set(payload.clone());
|
cache.set(payload.clone());
|
||||||
|
|||||||
96
socktop_agent/src/metrics_new.rs
Normal file
96
socktop_agent/src/metrics_new.rs
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
/// Collect all processes (non-Linux): optimized for reduced allocations and selective updates.
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
pub async fn collect_processes_all(state: &AppState) -> ProcessesPayload {
|
||||||
|
// Adaptive TTL based on system load
|
||||||
|
let sys_guard = state.sys.lock().await;
|
||||||
|
let load = sys_guard.global_cpu_usage();
|
||||||
|
drop(sys_guard);
|
||||||
|
|
||||||
|
let ttl_ms: u64 = if let Ok(v) = std::env::var("SOCKTOP_AGENT_PROCESSES_TTL_MS") {
|
||||||
|
v.parse().unwrap_or(2_000)
|
||||||
|
} else {
|
||||||
|
// Adaptive TTL: longer when system is idle
|
||||||
|
if load < 10.0 {
|
||||||
|
4_000 // Light load
|
||||||
|
} else if load < 30.0 {
|
||||||
|
2_000 // Medium load
|
||||||
|
} else {
|
||||||
|
1_000 // High load
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let ttl = StdDuration::from_millis(ttl_ms);
|
||||||
|
|
||||||
|
// Serve from cache if fresh
|
||||||
|
{
|
||||||
|
let cache = state.cache_processes.lock().await;
|
||||||
|
if cache.is_fresh(ttl) {
|
||||||
|
if let Some(v) = cache.take_clone() {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single efficient refresh: only update processes using significant CPU
|
||||||
|
let (total_count, procs) = {
|
||||||
|
let mut sys = state.sys.lock().await;
|
||||||
|
let kind = ProcessRefreshKind::nothing().with_cpu().with_memory();
|
||||||
|
|
||||||
|
// Only refresh processes using >0.1% CPU
|
||||||
|
sys.refresh_processes_specifics(
|
||||||
|
ProcessesToUpdate::new().with_cpu_usage_higher_than(0.1),
|
||||||
|
false,
|
||||||
|
kind
|
||||||
|
);
|
||||||
|
sys.refresh_cpu_usage();
|
||||||
|
|
||||||
|
let total_count = sys.processes().len();
|
||||||
|
|
||||||
|
// Reuse allocations via process cache
|
||||||
|
let mut proc_cache = state.proc_cache.lock().await;
|
||||||
|
proc_cache.reusable_vec.clear();
|
||||||
|
|
||||||
|
// Filter and collect processes with meaningful CPU usage
|
||||||
|
for p in sys.processes().values() {
|
||||||
|
let raw = p.cpu_usage();
|
||||||
|
if raw > 0.1 { // Skip negligible CPU users
|
||||||
|
let pid = p.pid().as_u32();
|
||||||
|
|
||||||
|
// Reuse cached name if available
|
||||||
|
let name = if let Some(cached) = proc_cache.names.get(&pid) {
|
||||||
|
cached.clone()
|
||||||
|
} else {
|
||||||
|
let new_name = p.name().to_string_lossy().into_owned();
|
||||||
|
proc_cache.names.insert(pid, new_name.clone());
|
||||||
|
new_name
|
||||||
|
};
|
||||||
|
|
||||||
|
proc_cache.reusable_vec.push(ProcessInfo {
|
||||||
|
pid,
|
||||||
|
name,
|
||||||
|
cpu_usage: raw.clamp(0.0, 100.0),
|
||||||
|
mem_bytes: p.memory(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up old process names periodically
|
||||||
|
if total_count > proc_cache.names.len() + 100 {
|
||||||
|
proc_cache.names.retain(|pid, _|
|
||||||
|
sys.processes().contains_key(&sysinfo::Pid::from_u32(*pid))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
(total_count, proc_cache.reusable_vec.clone())
|
||||||
|
};
|
||||||
|
|
||||||
|
let payload = ProcessesPayload {
|
||||||
|
process_count: total_count,
|
||||||
|
top_processes: procs,
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut cache = state.cache_processes.lock().await;
|
||||||
|
cache.set(payload.clone());
|
||||||
|
}
|
||||||
|
payload
|
||||||
|
}
|
||||||
@ -20,6 +20,22 @@ pub struct ProcCpuTracker {
|
|||||||
pub last_per_pid: HashMap<u32, u64>,
|
pub last_per_pid: HashMap<u32, u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
pub struct ProcessCache {
|
||||||
|
pub names: HashMap<u32, String>,
|
||||||
|
pub reusable_vec: Vec<crate::types::ProcessInfo>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
impl Default for ProcessCache {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
names: HashMap::with_capacity(256),
|
||||||
|
reusable_vec: Vec::with_capacity(256),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct AppState {
|
pub struct AppState {
|
||||||
pub sys: SharedSystem,
|
pub sys: SharedSystem,
|
||||||
@ -32,6 +48,10 @@ pub struct AppState {
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
pub proc_cpu: Arc<Mutex<ProcCpuTracker>>,
|
pub proc_cpu: Arc<Mutex<ProcCpuTracker>>,
|
||||||
|
|
||||||
|
// Process name caching and vector reuse for non-Linux to reduce allocations
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
pub proc_cache: Arc<Mutex<ProcessCache>>,
|
||||||
|
|
||||||
// Connection tracking (to allow future idle sleeps if desired)
|
// Connection tracking (to allow future idle sleeps if desired)
|
||||||
pub client_count: Arc<AtomicUsize>,
|
pub client_count: Arc<AtomicUsize>,
|
||||||
|
|
||||||
@ -89,6 +109,8 @@ impl AppState {
|
|||||||
hostname: System::host_name().unwrap_or_else(|| "unknown".into()),
|
hostname: System::host_name().unwrap_or_else(|| "unknown".into()),
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
proc_cpu: Arc::new(Mutex::new(ProcCpuTracker::default())),
|
proc_cpu: Arc::new(Mutex::new(ProcCpuTracker::default())),
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
proc_cache: Arc::new(Mutex::new(ProcessCache::default())),
|
||||||
client_count: Arc::new(AtomicUsize::new(0)),
|
client_count: Arc::new(AtomicUsize::new(0)),
|
||||||
auth_token: std::env::var("SOCKTOP_TOKEN")
|
auth_token: std::env::var("SOCKTOP_TOKEN")
|
||||||
.ok()
|
.ok()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user