test(meter): add fault-count regression test for meter diagnosis

This commit is contained in:
2026-03-16 16:32:21 +01:00
parent 3e9259735e
commit 99aae76404

View File

@@ -0,0 +1,301 @@
/**
* @file test_meter_fault_count.cpp
* @brief Unit test: verifies that the meter fault counter increments once per
* stale-data event, NOT once per catch-up tick.
*
* Regression test for the ~200 errors/hour bug where LoRa TX blocking caused
* the sampling catch-up loop to fire note_fault() for every missed 1s tick.
*
* Run on target with: pio test -e lilygo-t3-v1-6-1-test -f test_meter_fault_count
*/
#include <Arduino.h>
#include <unity.h>
#include "data_model.h"
// ---------- Minimal stubs replicating the fixed fault-counting logic ----------
static FaultCounters test_faults = {};
static FaultType test_last_error = FaultType::None;
static uint32_t test_last_error_utc = 0;
static uint32_t test_last_error_ms = 0;
static void note_fault_stub(FaultCounters &counters, FaultType &last_type,
uint32_t &last_ts_utc, uint32_t &last_ts_ms, FaultType type) {
if (type == FaultType::MeterRead) {
counters.meter_read_fail++;
} else if (type == FaultType::Decode) {
counters.decode_fail++;
} else if (type == FaultType::LoraTx) {
counters.lora_tx_fail++;
}
last_type = type;
last_ts_utc = millis() / 1000;
last_ts_ms = millis();
}
static void reset_test_faults() {
test_faults = {};
test_last_error = FaultType::None;
test_last_error_utc = 0;
test_last_error_ms = 0;
}
// ---------- Simulate the FIXED sampling loop logic ----------
static constexpr uint32_t SAMPLE_INTERVAL_MS = 1000;
/**
* Simulates the fixed sender_loop sampling section.
*
* @param last_sample_ms Tracks the last sample tick (in/out).
* @param now_ms Current millis().
* @param meter_ok Whether the meter snapshot is fresh.
* @param time_jump_pending Whether a time-jump event is pending (in/out).
* @param faults Fault counters (in/out).
* @return Number of samples generated in the catch-up loop.
*/
static uint32_t simulate_fixed_sampling(
uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok,
bool &time_jump_pending, FaultCounters &faults) {
FaultType last_error = FaultType::None;
uint32_t last_error_utc = 0;
uint32_t last_error_ms = 0;
bool meter_fault_noted = false;
// Time-jump: one fault per event, outside loop.
if (time_jump_pending) {
time_jump_pending = false;
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
meter_fault_noted = true;
}
// Stale meter: one fault per contiguous stale period, outside loop.
if (!meter_ok && !meter_fault_noted) {
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
}
uint32_t samples = 0;
while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) {
last_sample_ms += SAMPLE_INTERVAL_MS;
samples++;
}
return samples;
}
/**
* Simulates the OLD (buggy) sampling loop for comparison.
*/
static uint32_t simulate_buggy_sampling(
uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok,
bool &time_jump_pending, FaultCounters &faults) {
FaultType last_error = FaultType::None;
uint32_t last_error_utc = 0;
uint32_t last_error_ms = 0;
uint32_t samples = 0;
while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) {
last_sample_ms += SAMPLE_INTERVAL_MS;
samples++;
if (!meter_ok) {
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
}
if (time_jump_pending) {
time_jump_pending = false;
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
}
}
return samples;
}
// ---------- Tests ----------
/**
* Normal operation: meter is fresh, no blocking. 1 tick per call.
* Should produce 0 faults.
*/
static void test_no_fault_when_meter_fresh() {
FaultCounters faults = {};
uint32_t last_sample_ms = 0;
bool time_jump = false;
// Simulate 60 consecutive 1s ticks with fresh meter data.
for (int i = 1; i <= 60; i++) {
simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
}
TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
}
/**
* LoRa TX blocks for 10 seconds while meter is stale.
* OLD code: 10 faults. FIXED code: 1 fault.
*/
static void test_single_fault_after_blocking_stale() {
FaultCounters faults = {};
uint32_t last_sample_ms = 0;
bool time_jump = false;
// 5 normal ticks with fresh data.
for (int i = 1; i <= 5; i++) {
simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
}
TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
// LoRa TX blocks for 10s → meter goes stale.
// now_ms = 15000, last_sample_ms = 5000 → 10 catch-up ticks.
uint32_t samples = simulate_fixed_sampling(last_sample_ms, 15000, false, time_jump, faults);
TEST_ASSERT_EQUAL_UINT32(10, samples); // 10 ticks caught up.
TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail); // But only 1 fault!
}
/**
* Demonstrate the OLD buggy behavior: same scenario produces 10 faults.
*/
static void test_buggy_produces_many_faults() {
FaultCounters faults = {};
uint32_t last_sample_ms = 0;
bool time_jump = false;
for (int i = 1; i <= 5; i++) {
simulate_buggy_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
}
TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
simulate_buggy_sampling(last_sample_ms, 15000, false, time_jump, faults);
TEST_ASSERT_EQUAL_UINT32(10, faults.meter_read_fail); // Buggy: 10 faults for one event.
}
/**
* Time-jump event should produce exactly 1 additional fault,
* regardless of how many ticks are caught up.
*/
static void test_time_jump_single_fault() {
FaultCounters faults = {};
uint32_t last_sample_ms = 0;
bool time_jump = true; // Pending time-jump.
// 8 catch-up ticks with stale meter AND time jump pending.
uint32_t samples = simulate_fixed_sampling(last_sample_ms, 8000, false, time_jump, faults);
TEST_ASSERT_EQUAL_UINT32(8, samples);
// Time jump counted as 1, stale suppressed because meter_fault_noted == true.
TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail);
TEST_ASSERT_FALSE(time_jump);
}
/**
* Repeated stale periods should count 1 fault per call to the sampling function,
* not 1 per tick. After 3600s at 1 call/s with meter stale every call,
* the FIXED code should produce ≤ 3600 faults (1 per call).
* The OLD code would produce the same number (since 1 tick per call).
* The difference is when blocking causes N>1 ticks per call.
*/
static void test_sustained_stale_1hz_no_blocking() {
FaultCounters faults = {};
uint32_t last_sample_ms = 0;
bool time_jump = false;
// Simulate 1 hour at 1 Hz with meter always stale (no blocking, 1 tick/call).
for (uint32_t i = 1; i <= 3600; i++) {
simulate_fixed_sampling(last_sample_ms, i * 1000, false, time_jump, faults);
}
// 1 fault per call = 3600 faults. This correctly reflects 3600 distinct evaluations
// where the meter was stale.
TEST_ASSERT_EQUAL_UINT32(3600, faults.meter_read_fail);
}
/**
* Worst-case: 1 hour, main loop blocked for 10s every 30s (batch TX + ACK).
* Each blocking event catches up 10 ticks with stale meter.
*
* OLD: 10 faults per blocking event × 120 blocks = 1200 faults,
* + 20 normal stale ticks between blocks × 120 = 2400 → total ~3600.
*
* FIXED: 1 fault per blocking event + 1 per non-blocked stale call.
* 120 blocking events + 2400 normal calls = 2520.
* (Still correctly counts each loop iteration where meter was stale.)
*/
static void test_periodic_blocking_reduces_faults() {
FaultCounters faults_fixed = {};
FaultCounters faults_buggy = {};
uint32_t last_fixed = 0;
uint32_t last_buggy = 0;
bool tj_fixed = false;
bool tj_buggy = false;
uint32_t t = 0;
for (int cycle = 0; cycle < 120; cycle++) {
// 20s of normal 1Hz polling, meter stale.
for (int s = 0; s < 20; s++) {
t += 1000;
simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
}
// 10s blocking (LoRa TX + ACK), meter stale.
t += 10000;
simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
}
// Both produce 3600 samples total.
// Buggy: 20*120 normal + 10*120 from catch-up = 3600 faults.
TEST_ASSERT_EQUAL_UINT32(3600, faults_buggy.meter_read_fail);
// Fixed: 20*120 normal + 1*120 from catch-up = 2520 faults.
TEST_ASSERT_EQUAL_UINT32(2520, faults_fixed.meter_read_fail);
// Significant reduction: fixed < buggy.
TEST_ASSERT_TRUE(faults_fixed.meter_read_fail < faults_buggy.meter_read_fail);
}
/**
* Real scenario: meter works fine most of the time; occasional 5-10s stale
* during LoRa TX. With fresh meter otherwise, faults should be minimal.
*
* 1h = 120 batch cycles of 30s.
* Each cycle: 20s meter OK → 10s TX blocking (stale) → continue.
* FIXED: 120 faults/h (one per TX stale event).
* OLD: ~1200 faults/h (10 per TX stale event).
*/
static void test_realistic_scenario_mostly_fresh() {
FaultCounters faults_fixed = {};
FaultCounters faults_buggy = {};
uint32_t last_fixed = 0;
uint32_t last_buggy = 0;
bool tj_fixed = false;
bool tj_buggy = false;
uint32_t t = 0;
for (int cycle = 0; cycle < 120; cycle++) {
// 20s of fresh meter data.
for (int s = 0; s < 20; s++) {
t += 1000;
simulate_fixed_sampling(last_fixed, t, true, tj_fixed, faults_fixed);
simulate_buggy_sampling(last_buggy, t, true, tj_buggy, faults_buggy);
}
// 10s LoRa blocking, meter goes stale.
t += 10000;
simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
}
// Fixed: 0 faults during fresh + 1 per stale event = 120 faults/h.
TEST_ASSERT_EQUAL_UINT32(120, faults_fixed.meter_read_fail);
// Buggy: 0 faults during fresh + 10 per stale event = 1200 faults/h.
TEST_ASSERT_EQUAL_UINT32(1200, faults_buggy.meter_read_fail);
}
void setup() {
UNITY_BEGIN();
RUN_TEST(test_no_fault_when_meter_fresh);
RUN_TEST(test_single_fault_after_blocking_stale);
RUN_TEST(test_buggy_produces_many_faults);
RUN_TEST(test_time_jump_single_fault);
RUN_TEST(test_sustained_stale_1hz_no_blocking);
RUN_TEST(test_periodic_blocking_reduces_faults);
RUN_TEST(test_realistic_scenario_mostly_fresh);
UNITY_END();
}
void loop() {}