Skip to content

Commit

Permalink
feat(exporter): add process info
Browse files Browse the repository at this point in the history
  • Loading branch information
XuehaiPan committed Dec 28, 2024
1 parent 25b6d61 commit df1bfb1
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions nvitop-exporter/nvitop_exporter/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,12 @@ def __init__( # pylint: disable=too-many-statements
)

# Create gauges for process metrics
self.process_info = Info(
name='process_info',
documentation='Process information.',
labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
registry=self.registry,
)
self.process_running_time = Gauge(
name='process_running_time',
documentation='Process running time (s).',
Expand Down Expand Up @@ -592,11 +598,25 @@ def update_device(self, device: Device) -> None: # pylint: disable=too-many-loc
alive_pids.clear()

with GpuProcess.failsafe():
process_info = {}
for pid, process in device.processes().items():
with process.oneshot():
username = process.username()
running_time = process.running_time()
alive_pids.add((pid, username))
if (pid, username) not in process_info:
process_info[(pid, username)] = {
'status': process.status(),
'command': process.command(),
}
self.process_info.labels(
hostname=self.hostname,
index=index,
devicename=name,
uuid=uuid,
pid=pid,
username=username,
).info(process_info[(pid, username)])
for gauge, value in (
(
self.process_running_time,
Expand Down Expand Up @@ -633,7 +653,8 @@ def update_device(self, device: Device) -> None: # pylint: disable=too-many-loc
).set(value)

for pid, username in previous_alive_pids.difference(alive_pids):
for gauge in (
for collector in (
self.process_info,
self.process_running_time,
self.process_cpu_percent,
self.process_rss_memory,
Expand All @@ -645,7 +666,7 @@ def update_device(self, device: Device) -> None: # pylint: disable=too-many-loc
self.process_gpu_decoder_utilization,
):
try:
gauge.remove(
collector.remove(
self.hostname,
index,
name,
Expand Down

0 comments on commit df1bfb1

Please sign in to comment.