From 99aae764040bdf79c573e92f1b341f21d2dac4ef Mon Sep 17 00:00:00 2001 From: acidburns Date: Mon, 16 Mar 2026 16:32:21 +0100 Subject: [PATCH] test(meter): add fault-count regression test for meter diagnosis --- .../test_meter_fault_count.cpp | 301 ++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 test/test_meter_fault_count/test_meter_fault_count.cpp diff --git a/test/test_meter_fault_count/test_meter_fault_count.cpp b/test/test_meter_fault_count/test_meter_fault_count.cpp new file mode 100644 index 0000000..c4e3c12 --- /dev/null +++ b/test/test_meter_fault_count/test_meter_fault_count.cpp @@ -0,0 +1,301 @@ +/** + * @file test_meter_fault_count.cpp + * @brief Unit test: verifies that the meter fault counter increments once per + * stale-data event, NOT once per catch-up tick. + * + * Regression test for the ~200 errors/hour bug where LoRa TX blocking caused + * the sampling catch-up loop to fire note_fault() for every missed 1s tick. + * + * Run on target with: pio test -e lilygo-t3-v1-6-1-test -f test_meter_fault_count + */ + +#include +#include + +#include "data_model.h" + +// ---------- Minimal stubs replicating the fixed fault-counting logic ---------- + +static FaultCounters test_faults = {}; +static FaultType test_last_error = FaultType::None; +static uint32_t test_last_error_utc = 0; +static uint32_t test_last_error_ms = 0; + +static void note_fault_stub(FaultCounters &counters, FaultType &last_type, + uint32_t &last_ts_utc, uint32_t &last_ts_ms, FaultType type) { + if (type == FaultType::MeterRead) { + counters.meter_read_fail++; + } else if (type == FaultType::Decode) { + counters.decode_fail++; + } else if (type == FaultType::LoraTx) { + counters.lora_tx_fail++; + } + last_type = type; + last_ts_utc = millis() / 1000; + last_ts_ms = millis(); +} + +static void reset_test_faults() { + test_faults = {}; + test_last_error = FaultType::None; + test_last_error_utc = 0; + test_last_error_ms = 0; +} + +// ---------- Simulate the FIXED sampling loop logic ---------- + +static constexpr uint32_t SAMPLE_INTERVAL_MS = 1000; + +/** + * Simulates the fixed sender_loop sampling section. + * + * @param last_sample_ms Tracks the last sample tick (in/out). + * @param now_ms Current millis(). + * @param meter_ok Whether the meter snapshot is fresh. + * @param time_jump_pending Whether a time-jump event is pending (in/out). + * @param faults Fault counters (in/out). + * @return Number of samples generated in the catch-up loop. + */ +static uint32_t simulate_fixed_sampling( + uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok, + bool &time_jump_pending, FaultCounters &faults) { + + FaultType last_error = FaultType::None; + uint32_t last_error_utc = 0; + uint32_t last_error_ms = 0; + bool meter_fault_noted = false; + + // Time-jump: one fault per event, outside loop. + if (time_jump_pending) { + time_jump_pending = false; + note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead); + meter_fault_noted = true; + } + + // Stale meter: one fault per contiguous stale period, outside loop. + if (!meter_ok && !meter_fault_noted) { + note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead); + } + + uint32_t samples = 0; + while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) { + last_sample_ms += SAMPLE_INTERVAL_MS; + samples++; + } + return samples; +} + +/** + * Simulates the OLD (buggy) sampling loop for comparison. + */ +static uint32_t simulate_buggy_sampling( + uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok, + bool &time_jump_pending, FaultCounters &faults) { + + FaultType last_error = FaultType::None; + uint32_t last_error_utc = 0; + uint32_t last_error_ms = 0; + + uint32_t samples = 0; + while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) { + last_sample_ms += SAMPLE_INTERVAL_MS; + samples++; + if (!meter_ok) { + note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead); + } + if (time_jump_pending) { + time_jump_pending = false; + note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead); + } + } + return samples; +} + +// ---------- Tests ---------- + +/** + * Normal operation: meter is fresh, no blocking. 1 tick per call. + * Should produce 0 faults. + */ +static void test_no_fault_when_meter_fresh() { + FaultCounters faults = {}; + uint32_t last_sample_ms = 0; + bool time_jump = false; + + // Simulate 60 consecutive 1s ticks with fresh meter data. + for (int i = 1; i <= 60; i++) { + simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults); + } + + TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail); +} + +/** + * LoRa TX blocks for 10 seconds while meter is stale. + * OLD code: 10 faults. FIXED code: 1 fault. + */ +static void test_single_fault_after_blocking_stale() { + FaultCounters faults = {}; + uint32_t last_sample_ms = 0; + bool time_jump = false; + + // 5 normal ticks with fresh data. + for (int i = 1; i <= 5; i++) { + simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults); + } + TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail); + + // LoRa TX blocks for 10s → meter goes stale. + // now_ms = 15000, last_sample_ms = 5000 → 10 catch-up ticks. + uint32_t samples = simulate_fixed_sampling(last_sample_ms, 15000, false, time_jump, faults); + TEST_ASSERT_EQUAL_UINT32(10, samples); // 10 ticks caught up. + TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail); // But only 1 fault! +} + +/** + * Demonstrate the OLD buggy behavior: same scenario produces 10 faults. + */ +static void test_buggy_produces_many_faults() { + FaultCounters faults = {}; + uint32_t last_sample_ms = 0; + bool time_jump = false; + + for (int i = 1; i <= 5; i++) { + simulate_buggy_sampling(last_sample_ms, i * 1000, true, time_jump, faults); + } + TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail); + + simulate_buggy_sampling(last_sample_ms, 15000, false, time_jump, faults); + TEST_ASSERT_EQUAL_UINT32(10, faults.meter_read_fail); // Buggy: 10 faults for one event. +} + +/** + * Time-jump event should produce exactly 1 additional fault, + * regardless of how many ticks are caught up. + */ +static void test_time_jump_single_fault() { + FaultCounters faults = {}; + uint32_t last_sample_ms = 0; + bool time_jump = true; // Pending time-jump. + + // 8 catch-up ticks with stale meter AND time jump pending. + uint32_t samples = simulate_fixed_sampling(last_sample_ms, 8000, false, time_jump, faults); + TEST_ASSERT_EQUAL_UINT32(8, samples); + // Time jump counted as 1, stale suppressed because meter_fault_noted == true. + TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail); + TEST_ASSERT_FALSE(time_jump); +} + +/** + * Repeated stale periods should count 1 fault per call to the sampling function, + * not 1 per tick. After 3600s at 1 call/s with meter stale every call, + * the FIXED code should produce ≤ 3600 faults (1 per call). + * The OLD code would produce the same number (since 1 tick per call). + * The difference is when blocking causes N>1 ticks per call. + */ +static void test_sustained_stale_1hz_no_blocking() { + FaultCounters faults = {}; + uint32_t last_sample_ms = 0; + bool time_jump = false; + + // Simulate 1 hour at 1 Hz with meter always stale (no blocking, 1 tick/call). + for (uint32_t i = 1; i <= 3600; i++) { + simulate_fixed_sampling(last_sample_ms, i * 1000, false, time_jump, faults); + } + // 1 fault per call = 3600 faults. This correctly reflects 3600 distinct evaluations + // where the meter was stale. + TEST_ASSERT_EQUAL_UINT32(3600, faults.meter_read_fail); +} + +/** + * Worst-case: 1 hour, main loop blocked for 10s every 30s (batch TX + ACK). + * Each blocking event catches up 10 ticks with stale meter. + * + * OLD: 10 faults per blocking event × 120 blocks = 1200 faults, + * + 20 normal stale ticks between blocks × 120 = 2400 → total ~3600. + * + * FIXED: 1 fault per blocking event + 1 per non-blocked stale call. + * 120 blocking events + 2400 normal calls = 2520. + * (Still correctly counts each loop iteration where meter was stale.) + */ +static void test_periodic_blocking_reduces_faults() { + FaultCounters faults_fixed = {}; + FaultCounters faults_buggy = {}; + uint32_t last_fixed = 0; + uint32_t last_buggy = 0; + bool tj_fixed = false; + bool tj_buggy = false; + + uint32_t t = 0; + for (int cycle = 0; cycle < 120; cycle++) { + // 20s of normal 1Hz polling, meter stale. + for (int s = 0; s < 20; s++) { + t += 1000; + simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed); + simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy); + } + // 10s blocking (LoRa TX + ACK), meter stale. + t += 10000; + simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed); + simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy); + } + + // Both produce 3600 samples total. + // Buggy: 20*120 normal + 10*120 from catch-up = 3600 faults. + TEST_ASSERT_EQUAL_UINT32(3600, faults_buggy.meter_read_fail); + // Fixed: 20*120 normal + 1*120 from catch-up = 2520 faults. + TEST_ASSERT_EQUAL_UINT32(2520, faults_fixed.meter_read_fail); + // Significant reduction: fixed < buggy. + TEST_ASSERT_TRUE(faults_fixed.meter_read_fail < faults_buggy.meter_read_fail); +} + +/** + * Real scenario: meter works fine most of the time; occasional 5-10s stale + * during LoRa TX. With fresh meter otherwise, faults should be minimal. + * + * 1h = 120 batch cycles of 30s. + * Each cycle: 20s meter OK → 10s TX blocking (stale) → continue. + * FIXED: 120 faults/h (one per TX stale event). + * OLD: ~1200 faults/h (10 per TX stale event). + */ +static void test_realistic_scenario_mostly_fresh() { + FaultCounters faults_fixed = {}; + FaultCounters faults_buggy = {}; + uint32_t last_fixed = 0; + uint32_t last_buggy = 0; + bool tj_fixed = false; + bool tj_buggy = false; + + uint32_t t = 0; + for (int cycle = 0; cycle < 120; cycle++) { + // 20s of fresh meter data. + for (int s = 0; s < 20; s++) { + t += 1000; + simulate_fixed_sampling(last_fixed, t, true, tj_fixed, faults_fixed); + simulate_buggy_sampling(last_buggy, t, true, tj_buggy, faults_buggy); + } + // 10s LoRa blocking, meter goes stale. + t += 10000; + simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed); + simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy); + } + + // Fixed: 0 faults during fresh + 1 per stale event = 120 faults/h. + TEST_ASSERT_EQUAL_UINT32(120, faults_fixed.meter_read_fail); + // Buggy: 0 faults during fresh + 10 per stale event = 1200 faults/h. + TEST_ASSERT_EQUAL_UINT32(1200, faults_buggy.meter_read_fail); +} + +void setup() { + UNITY_BEGIN(); + RUN_TEST(test_no_fault_when_meter_fresh); + RUN_TEST(test_single_fault_after_blocking_stale); + RUN_TEST(test_buggy_produces_many_faults); + RUN_TEST(test_time_jump_single_fault); + RUN_TEST(test_sustained_stale_1hz_no_blocking); + RUN_TEST(test_periodic_blocking_reduces_faults); + RUN_TEST(test_realistic_scenario_mostly_fresh); + UNITY_END(); +} + +void loop() {}