jeol-t330a/succbone/succd/process.go
Rahix c031fa5a43
All checks were successful
/ test (push) Successful in 10s
/ test (pull_request) Successful in 10s
succd: Add metrics for RP and RP operating time
Add counter metrics that count the total operating time for the roughing
pump and the diffusion pump.
2024-10-07 07:42:42 +02:00

210 lines
5.4 KiB
Go

package main
import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"time"
"k8s.io/klog"
)
// daemon is the main service of the succdaemon.
type daemon struct {
// adcPirani is the adc implementation returning the voltage of the Pfeiffer
// Pirani gauge.
adcPirani adc
gpioDiffusionPump gpio
gpioRoughingPump gpio
gpioBtnPumpDown gpio
gpioBtnVent gpio
gpioBelowRough gpio
gpioBelowHigh gpio
load atomic.Int64
// mu guards the state below.
mu sync.RWMutex
daemonState
}
// daemonState contains all the state of the daemon. A copy of it can be
// requested for consumers, eg. the web view.
type daemonState struct {
safety struct {
// failsafe mode is enabled when the pirani gauge appears to be
// disconnected, and is disabled only when an atmosphere is read.
failsafe bool
// highPressure mode is enabled when the pressure reading is above 1e-1
// mbar, locking out the diffusion pump from being enabled.
highPressure bool
}
piraniVolts100 ringbufferInput
piraniMbar100 pfeifferVoltsToMbar
piraniVolts3 ringbufferInput
piraniMbar3 pfeifferVoltsToMbar
rpOn bool
dpOn bool
rpOperatingTime time.Duration
dpOperatingTime time.Duration
vent momentaryOutput
pumpdown momentaryOutput
aboveRough thresholdOutput
aboveHigh thresholdOutput
}
func (d *daemonState) vacuumStatus() (rough, high bool) {
rough = !d.aboveRough.output
high = !d.aboveHigh.output
return
}
// process runs the pain acquisition and control loop of succd.
func (d *daemon) process(ctx context.Context) {
var lastRun time.Time
hz := 100
period := time.Second / time.Duration(hz)
// Extra grace period for GC pauses and other non-realtime system jitter.
periodGrace := period + time.Millisecond*5
ticker := time.NewTicker(period)
defer ticker.Stop()
for {
select {
case <-ticker.C:
now := time.Now()
var elapsed time.Duration = 0
if !lastRun.IsZero() {
elapsed = now.Sub(lastRun)
if elapsed > periodGrace {
klog.Warningf("Processing loop lag: took %s, want %s", elapsed, period)
}
}
lastRun = now
if err := d.processOnce(ctx, elapsed); err != nil {
if errors.Is(err, ctx.Err()) {
return
} else {
klog.Errorf("Processing error: %v", err)
time.Sleep(time.Second * 10)
}
}
runtime := time.Since(lastRun)
load := int64(100 * runtime / period)
d.load.Store(load)
case <-ctx.Done():
return
}
}
}
// processOnce runs the main loop step of succd.
func (d *daemon) processOnce(_ context.Context, elapsed time.Duration) error {
v, err := d.adcPirani.Read()
if err != nil {
return fmt.Errorf("when reading ADC: %w", err)
}
d.mu.Lock()
defer d.mu.Unlock()
// Process pirani ringbuffers.
d.piraniVolts3.process(v)
d.piraniMbar3.process(d.piraniVolts3.avg)
d.piraniVolts100.process(v)
d.piraniMbar100.process(d.piraniVolts100.avg)
d.pumpdown.process()
d.vent.process()
// Run safety checks based on small ringbuffer.
if d.piraniVolts3.saturated() {
mbar := d.piraniMbar3.mbar
if !d.safety.failsafe && mbar < 4e-6 {
// Unrealistic result, Pirani probe probably disconnected. Failsafe mode.
d.safety.failsafe = true
klog.Errorf("SAFETY: Pirani probe seems disconnected; enabling failsafe mode")
}
if d.safety.failsafe && mbar > 1e2 {
d.safety.failsafe = false
klog.Infof("SAFETY: Pirani probe value (%s) is plausible again; quitting failsafe mode", formatMbar(mbar))
}
if !d.safety.highPressure && mbar >= 1e-1 {
d.safety.highPressure = true
klog.Warningf("SAFETY: Pressure is too high (%s mbar); enabling diffusion pump lockout", formatMbar(mbar))
}
if d.safety.highPressure && mbar < (1e-1)-(1e-2) {
d.safety.highPressure = false
klog.Infof("SAFETY: Pressure is low enough (%s mbar) for diffusion pump operation; quitting diffusion pump lockout", formatMbar(mbar))
}
} else {
d.safety.highPressure = true
}
// Control threhold/feedback values based on main pirani ringbuffer, failing
// safe if not enough data is present.
if d.piraniVolts100.saturated() {
mbar := d.piraniMbar100.mbar
d.aboveRough.process(float64(mbar))
d.aboveHigh.process(float64(mbar))
} else {
d.aboveRough.output = true
d.aboveHigh.output = true
}
// Apply safety overrides.
if d.safety.failsafe {
d.aboveRough.output = true
d.aboveHigh.output = true
d.dpOn = false
}
if d.safety.highPressure {
d.dpOn = false
}
// Update relay outputs.
for _, rel := range []struct {
name string
gpio gpio
// activeHigh means the relay is active high, ie. a true source will
// mean that NO/COM get connected, and a false source means that NC/COM
// get connected.
activeHigh bool
source bool
}{
{"rp", d.gpioRoughingPump, false, d.rpOn},
{"dp", d.gpioDiffusionPump, true, d.dpOn},
{"pumpdown", d.gpioBtnPumpDown, true, d.pumpdown.output},
{"vent", d.gpioBtnVent, true, d.vent.output},
{"rough", d.gpioBelowRough, false, d.aboveRough.output},
{"high", d.gpioBelowHigh, false, d.aboveHigh.output},
} {
val := rel.source
if rel.activeHigh {
// Invert because the relays go through logical inversion (ie. a
// GPIO false is a relay trigger).
val = !val
}
if err := rel.gpio.set(val); err != nil {
return fmt.Errorf("when outputting %s: %w", rel.name, err)
}
}
// Update operating time counters
if d.rpOn && elapsed > 0 {
d.rpOperatingTime += elapsed
}
if d.dpOn && elapsed > 0 {
d.dpOperatingTime += elapsed
}
return nil
}