Compare commits

...

2 commits

Author SHA1 Message Date
Rahix c031fa5a43 succd: Add metrics for RP and RP operating time
All checks were successful
/ test (push) Successful in 10s
/ test (pull_request) Successful in 10s
Add counter metrics that count the total operating time for the roughing
pump and the diffusion pump.
2024-10-07 07:42:42 +02:00
Rahix 0aba323779 succd: Export all process values as prometheus metrics
All checks were successful
/ test (pull_request) Successful in 10s
/ test (push) Successful in 9s
For more detailed monitoring, let's export all process values that are
exposed to the web API as prometheus metrics.
2024-10-07 07:42:42 +02:00
2 changed files with 73 additions and 6 deletions

View file

@ -163,16 +163,68 @@ func (s *webServer) viewStream(w http.ResponseWriter, r *http.Request) {
}
}
func boolToFloat(b bool) float32 {
if b {
return 1.0
} else {
return 0.0
}
}
// httpMetrics serves minimalistic Prometheus-compatible metrics.
func (s *webServer) viewMetrics(w http.ResponseWriter, r *http.Request) {
// TODO(q3k): also serve Go stuff using the actual Prometheus metrics client
// library.
// TODO(q3k): serve the rest of the data model
state := s.d.snapshot()
mbar := state.piraniMbar100.mbar
// sem_pressure_mbar is meant to represent the fused pressure value
// from all data sources once we have more vacuum sensors in the
// system. sem_pirani_mbar is just the reading from the pirani gauge.
fmt.Fprintf(w, "# HELP sem_pressure_mbar Pressure in the SEM chamber, in millibar\n")
fmt.Fprintf(w, "# TYPE sem_pressure_mbar gauge\n")
fmt.Fprintf(w, "sem_pressure_mbar %f\n", mbar)
fmt.Fprintf(w, "sem_pressure_mbar %f\n", state.piraniMbar100.mbar)
fmt.Fprintf(w, "# HELP sem_pirani_mbar Pressure reading by the Pirani gauge, in millibar\n")
fmt.Fprintf(w, "# TYPE sem_pirani_mbar gauge\n")
fmt.Fprintf(w, "sem_pirani_mbar %f\n", state.piraniMbar100.mbar)
fmt.Fprintf(w, "# HELP sem_pirani_volts Voltage output from the Pirani gauge, in volts\n")
fmt.Fprintf(w, "# TYPE sem_pirani_volts gauge\n")
fmt.Fprintf(w, "sem_pirani_volts %f\n", state.piraniVolts100.avg)
fmt.Fprintf(w, "# HELP sem_pirani_failsafe_active Whether pirani gauge failsafe mode is active (boolean)\n")
fmt.Fprintf(w, "# TYPE sem_pirani_failsafe_active gauge\n")
fmt.Fprintf(w, "sem_pirani_failsafe_active %f\n", boolToFloat(state.safety.failsafe))
fmt.Fprintf(w, "# HELP sem_dp_lockout_active Whether diffusion pump lockout is active (boolean)\n")
fmt.Fprintf(w, "# TYPE sem_dp_lockout_active gauge\n")
fmt.Fprintf(w, "sem_dp_lockout_active %f\n", boolToFloat(state.safety.highPressure))
fmt.Fprintf(w, "# HELP sem_pump_diffusion_running Whether the diffusion pump is running (boolean)\n")
fmt.Fprintf(w, "# TYPE sem_pump_diffusion_running gauge\n")
fmt.Fprintf(w, "sem_pump_diffusion_running %f\n", boolToFloat(state.dpOn))
fmt.Fprintf(w, "# HELP sem_pump_roughing_running Whether the roughing pump is running (boolean)\n")
fmt.Fprintf(w, "# TYPE sem_pump_roughing_running gauge\n")
fmt.Fprintf(w, "sem_pump_roughing_running %f\n", boolToFloat(state.rpOn))
rough, high := state.vacuumStatus()
fmt.Fprintf(w, "# HELP sem_vacuum_rough_reached Whether a rough vacuum has been reached (boolean)\n")
fmt.Fprintf(w, "# TYPE sem_vacuum_rough_reached gauge\n")
fmt.Fprintf(w, "sem_vacuum_rough_reached %f\n", boolToFloat(rough))
fmt.Fprintf(w, "# HELP sem_vacuum_high_reached Whether a high vacuum has been reached (boolean)\n")
fmt.Fprintf(w, "# TYPE sem_vacuum_high_reached gauge\n")
fmt.Fprintf(w, "sem_vacuum_high_reached %f\n", boolToFloat(high))
fmt.Fprintf(w, "# HELP sem_rp_operating_seconds_total Operating time of the roughing pump, in seconds\n")
fmt.Fprintf(w, "# TYPE sem_rp_operating_seconds_total counter\n")
fmt.Fprintf(w, "sem_rp_operating_seconds_total %f\n", state.rpOperatingTime.Seconds())
fmt.Fprintf(w, "# HELP sem_dp_operating_seconds_total Operating time of the diffusion pump, in seconds\n")
fmt.Fprintf(w, "# TYPE sem_dp_operating_seconds_total counter\n")
fmt.Fprintf(w, "sem_dp_operating_seconds_total %f\n", state.dpOperatingTime.Seconds())
}
func (s *webServer) viewRoughingPumpEnable(w http.ResponseWriter, r *http.Request) {

View file

@ -51,6 +51,9 @@ type daemonState struct {
rpOn bool
dpOn bool
rpOperatingTime time.Duration
dpOperatingTime time.Duration
vent momentaryOutput
pumpdown momentaryOutput
aboveRough thresholdOutput
@ -77,11 +80,15 @@ func (d *daemon) process(ctx context.Context) {
select {
case <-ticker.C:
now := time.Now()
if elapsed := now.Sub(lastRun); !lastRun.IsZero() && elapsed > periodGrace {
klog.Warningf("Processing loop lag: took %s, want %s", elapsed, period)
var elapsed time.Duration = 0
if !lastRun.IsZero() {
elapsed = now.Sub(lastRun)
if elapsed > periodGrace {
klog.Warningf("Processing loop lag: took %s, want %s", elapsed, period)
}
}
lastRun = now
if err := d.processOnce(ctx); err != nil {
if err := d.processOnce(ctx, elapsed); err != nil {
if errors.Is(err, ctx.Err()) {
return
} else {
@ -99,7 +106,7 @@ func (d *daemon) process(ctx context.Context) {
}
// processOnce runs the main loop step of succd.
func (d *daemon) processOnce(_ context.Context) error {
func (d *daemon) processOnce(_ context.Context, elapsed time.Duration) error {
v, err := d.adcPirani.Read()
if err != nil {
return fmt.Errorf("when reading ADC: %w", err)
@ -190,5 +197,13 @@ func (d *daemon) processOnce(_ context.Context) error {
}
}
// Update operating time counters
if d.rpOn && elapsed > 0 {
d.rpOperatingTime += elapsed
}
if d.dpOn && elapsed > 0 {
d.dpOperatingTime += elapsed
}
return nil
}