test(meter): add fault-count regression test for meter diagnosis

2026-03-16 16:32:21 +01:00
parent 3e9259735e
commit 99aae76404
1 changed files with 301 additions and 0 deletions
@@ -0,0 +1,301 @@
 /**
 * @file test_meter_fault_count.cpp
 * @brief Unit test: verifies that the meter fault counter increments once per
 *        stale-data event, NOT once per catch-up tick.
 *
 * Regression test for the ~200 errors/hour bug where LoRa TX blocking caused
 * the sampling catch-up loop to fire note_fault() for every missed 1s tick.
 *
 * Run on target with: pio test -e lilygo-t3-v1-6-1-test -f test_meter_fault_count
 */
 #include <Arduino.h>
 #include <unity.h>
 #include "data_model.h"
 // ---------- Minimal stubs replicating the fixed fault-counting logic ----------
 static FaultCounters test_faults = {};
 static FaultType test_last_error = FaultType::None;
 static uint32_t test_last_error_utc = 0;
 static uint32_t test_last_error_ms = 0;
 static void note_fault_stub(FaultCounters &counters, FaultType &last_type,
                             uint32_t &last_ts_utc, uint32_t &last_ts_ms, FaultType type) {
  if (type == FaultType::MeterRead) {
    counters.meter_read_fail++;
  } else if (type == FaultType::Decode) {
    counters.decode_fail++;
  } else if (type == FaultType::LoraTx) {
    counters.lora_tx_fail++;
  }
  last_type = type;
  last_ts_utc = millis() / 1000;
  last_ts_ms = millis();
 }
 static void reset_test_faults() {
  test_faults = {};
  test_last_error = FaultType::None;
  test_last_error_utc = 0;
  test_last_error_ms = 0;
 }
 // ---------- Simulate the FIXED sampling loop logic ----------
 static constexpr uint32_t SAMPLE_INTERVAL_MS = 1000;
 /**
 * Simulates the fixed sender_loop sampling section.
 *
 * @param last_sample_ms  Tracks the last sample tick (in/out).
 * @param now_ms          Current millis().
 * @param meter_ok        Whether the meter snapshot is fresh.
 * @param time_jump_pending  Whether a time-jump event is pending (in/out).
 * @param faults          Fault counters (in/out).
 * @return Number of samples generated in the catch-up loop.
 */
 static uint32_t simulate_fixed_sampling(
    uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok,
    bool &time_jump_pending, FaultCounters &faults) {
  FaultType last_error = FaultType::None;
  uint32_t last_error_utc = 0;
  uint32_t last_error_ms = 0;
  bool meter_fault_noted = false;
  // Time-jump: one fault per event, outside loop.
  if (time_jump_pending) {
    time_jump_pending = false;
    note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
    meter_fault_noted = true;
  }
  // Stale meter: one fault per contiguous stale period, outside loop.
  if (!meter_ok && !meter_fault_noted) {
    note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
  }
  uint32_t samples = 0;
  while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) {
    last_sample_ms += SAMPLE_INTERVAL_MS;
    samples++;
  }
  return samples;
 }
 /**
 * Simulates the OLD (buggy) sampling loop for comparison.
 */
 static uint32_t simulate_buggy_sampling(
    uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok,
    bool &time_jump_pending, FaultCounters &faults) {
  FaultType last_error = FaultType::None;
  uint32_t last_error_utc = 0;
  uint32_t last_error_ms = 0;
  uint32_t samples = 0;
  while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) {
    last_sample_ms += SAMPLE_INTERVAL_MS;
    samples++;
    if (!meter_ok) {
      note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
    }
    if (time_jump_pending) {
      time_jump_pending = false;
      note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
    }
  }
  return samples;
 }
 // ---------- Tests ----------
 /**
 * Normal operation: meter is fresh, no blocking. 1 tick per call.
 * Should produce 0 faults.
 */
 static void test_no_fault_when_meter_fresh() {
  FaultCounters faults = {};
  uint32_t last_sample_ms = 0;
  bool time_jump = false;
  // Simulate 60 consecutive 1s ticks with fresh meter data.
  for (int i = 1; i <= 60; i++) {
    simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
  }
  TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
 }
 /**
 * LoRa TX blocks for 10 seconds while meter is stale.
 * OLD code: 10 faults. FIXED code: 1 fault.
 */
 static void test_single_fault_after_blocking_stale() {
  FaultCounters faults = {};
  uint32_t last_sample_ms = 0;
  bool time_jump = false;
  // 5 normal ticks with fresh data.
  for (int i = 1; i <= 5; i++) {
    simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
  }
  TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
  // LoRa TX blocks for 10s → meter goes stale.
  // now_ms = 15000, last_sample_ms = 5000 → 10 catch-up ticks.
  uint32_t samples = simulate_fixed_sampling(last_sample_ms, 15000, false, time_jump, faults);
  TEST_ASSERT_EQUAL_UINT32(10, samples);   // 10 ticks caught up.
  TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail);  // But only 1 fault!
 }
 /**
 * Demonstrate the OLD buggy behavior: same scenario produces 10 faults.
 */
 static void test_buggy_produces_many_faults() {
  FaultCounters faults = {};
  uint32_t last_sample_ms = 0;
  bool time_jump = false;
  for (int i = 1; i <= 5; i++) {
    simulate_buggy_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
  }
  TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
  simulate_buggy_sampling(last_sample_ms, 15000, false, time_jump, faults);
  TEST_ASSERT_EQUAL_UINT32(10, faults.meter_read_fail);  // Buggy: 10 faults for one event.
 }
 /**
 * Time-jump event should produce exactly 1 additional fault,
 * regardless of how many ticks are caught up.
 */
 static void test_time_jump_single_fault() {
  FaultCounters faults = {};
  uint32_t last_sample_ms = 0;
  bool time_jump = true;  // Pending time-jump.
  // 8 catch-up ticks with stale meter AND time jump pending.
  uint32_t samples = simulate_fixed_sampling(last_sample_ms, 8000, false, time_jump, faults);
  TEST_ASSERT_EQUAL_UINT32(8, samples);
  // Time jump counted as 1, stale suppressed because meter_fault_noted == true.
  TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail);
  TEST_ASSERT_FALSE(time_jump);
 }
 /**
 * Repeated stale periods should count 1 fault per call to the sampling function,
 * not 1 per tick. After 3600s at 1 call/s with meter stale every call,
 * the FIXED code should produce ≤ 3600 faults (1 per call).
 * The OLD code would produce the same number (since 1 tick per call).
 * The difference is when blocking causes N>1 ticks per call.
 */
 static void test_sustained_stale_1hz_no_blocking() {
  FaultCounters faults = {};
  uint32_t last_sample_ms = 0;
  bool time_jump = false;
  // Simulate 1 hour at 1 Hz with meter always stale (no blocking, 1 tick/call).
  for (uint32_t i = 1; i <= 3600; i++) {
    simulate_fixed_sampling(last_sample_ms, i * 1000, false, time_jump, faults);
  }
  // 1 fault per call = 3600 faults. This correctly reflects 3600 distinct evaluations
  // where the meter was stale.
  TEST_ASSERT_EQUAL_UINT32(3600, faults.meter_read_fail);
 }
 /**
 * Worst-case: 1 hour, main loop blocked for 10s every 30s (batch TX + ACK).
 * Each blocking event catches up 10 ticks with stale meter.
 *
 * OLD: 10 faults per blocking event × 120 blocks = 1200 faults,
 *      + 20 normal stale ticks between blocks × 120 = 2400 → total ~3600.
 *
 * FIXED: 1 fault per blocking event + 1 per non-blocked stale call.
 *        120 blocking events + 2400 normal calls = 2520.
 *        (Still correctly counts each loop iteration where meter was stale.)
 */
 static void test_periodic_blocking_reduces_faults() {
  FaultCounters faults_fixed = {};
  FaultCounters faults_buggy = {};
  uint32_t last_fixed = 0;
  uint32_t last_buggy = 0;
  bool tj_fixed = false;
  bool tj_buggy = false;
  uint32_t t = 0;
  for (int cycle = 0; cycle < 120; cycle++) {
    // 20s of normal 1Hz polling, meter stale.
    for (int s = 0; s < 20; s++) {
      t += 1000;
      simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
      simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
    }
    // 10s blocking (LoRa TX + ACK), meter stale.
    t += 10000;
    simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
    simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
  }
  // Both produce 3600 samples total.
  // Buggy: 20*120 normal + 10*120 from catch-up = 3600 faults.
  TEST_ASSERT_EQUAL_UINT32(3600, faults_buggy.meter_read_fail);
  // Fixed: 20*120 normal + 1*120 from catch-up = 2520 faults.
  TEST_ASSERT_EQUAL_UINT32(2520, faults_fixed.meter_read_fail);
  // Significant reduction: fixed < buggy.
  TEST_ASSERT_TRUE(faults_fixed.meter_read_fail < faults_buggy.meter_read_fail);
 }
 /**
 * Real scenario: meter works fine most of the time; occasional 5-10s stale
 * during LoRa TX. With fresh meter otherwise, faults should be minimal.
 *
 * 1h = 120 batch cycles of 30s.
 * Each cycle: 20s meter OK → 10s TX blocking (stale) → continue.
 * FIXED: 120 faults/h (one per TX stale event).
 * OLD: ~1200 faults/h (10 per TX stale event).
 */
 static void test_realistic_scenario_mostly_fresh() {
  FaultCounters faults_fixed = {};
  FaultCounters faults_buggy = {};
  uint32_t last_fixed = 0;
  uint32_t last_buggy = 0;
  bool tj_fixed = false;
  bool tj_buggy = false;
  uint32_t t = 0;
  for (int cycle = 0; cycle < 120; cycle++) {
    // 20s of fresh meter data.
    for (int s = 0; s < 20; s++) {
      t += 1000;
      simulate_fixed_sampling(last_fixed, t, true, tj_fixed, faults_fixed);
      simulate_buggy_sampling(last_buggy, t, true, tj_buggy, faults_buggy);
    }
    // 10s LoRa blocking, meter goes stale.
    t += 10000;
    simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
    simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
  }
  // Fixed: 0 faults during fresh + 1 per stale event = 120 faults/h.
  TEST_ASSERT_EQUAL_UINT32(120, faults_fixed.meter_read_fail);
  // Buggy: 0 faults during fresh + 10 per stale event = 1200 faults/h.
  TEST_ASSERT_EQUAL_UINT32(1200, faults_buggy.meter_read_fail);
 }
 void setup() {
  UNITY_BEGIN();
  RUN_TEST(test_no_fault_when_meter_fresh);
  RUN_TEST(test_single_fault_after_blocking_stale);
  RUN_TEST(test_buggy_produces_many_faults);
  RUN_TEST(test_time_jump_single_fault);
  RUN_TEST(test_sustained_stale_1hz_no_blocking);
  RUN_TEST(test_periodic_blocking_reduces_faults);
  RUN_TEST(test_realistic_scenario_mostly_fresh);
  UNITY_END();
 }
 void loop() {}