test(meter): add fault-count regression test for meter diagnosis
This commit is contained in:
301
test/test_meter_fault_count/test_meter_fault_count.cpp
Normal file
301
test/test_meter_fault_count/test_meter_fault_count.cpp
Normal file
@@ -0,0 +1,301 @@
|
|||||||
|
/**
|
||||||
|
* @file test_meter_fault_count.cpp
|
||||||
|
* @brief Unit test: verifies that the meter fault counter increments once per
|
||||||
|
* stale-data event, NOT once per catch-up tick.
|
||||||
|
*
|
||||||
|
* Regression test for the ~200 errors/hour bug where LoRa TX blocking caused
|
||||||
|
* the sampling catch-up loop to fire note_fault() for every missed 1s tick.
|
||||||
|
*
|
||||||
|
* Run on target with: pio test -e lilygo-t3-v1-6-1-test -f test_meter_fault_count
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Arduino.h>
|
||||||
|
#include <unity.h>
|
||||||
|
|
||||||
|
#include "data_model.h"
|
||||||
|
|
||||||
|
// ---------- Minimal stubs replicating the fixed fault-counting logic ----------
|
||||||
|
|
||||||
|
static FaultCounters test_faults = {};
|
||||||
|
static FaultType test_last_error = FaultType::None;
|
||||||
|
static uint32_t test_last_error_utc = 0;
|
||||||
|
static uint32_t test_last_error_ms = 0;
|
||||||
|
|
||||||
|
static void note_fault_stub(FaultCounters &counters, FaultType &last_type,
|
||||||
|
uint32_t &last_ts_utc, uint32_t &last_ts_ms, FaultType type) {
|
||||||
|
if (type == FaultType::MeterRead) {
|
||||||
|
counters.meter_read_fail++;
|
||||||
|
} else if (type == FaultType::Decode) {
|
||||||
|
counters.decode_fail++;
|
||||||
|
} else if (type == FaultType::LoraTx) {
|
||||||
|
counters.lora_tx_fail++;
|
||||||
|
}
|
||||||
|
last_type = type;
|
||||||
|
last_ts_utc = millis() / 1000;
|
||||||
|
last_ts_ms = millis();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void reset_test_faults() {
|
||||||
|
test_faults = {};
|
||||||
|
test_last_error = FaultType::None;
|
||||||
|
test_last_error_utc = 0;
|
||||||
|
test_last_error_ms = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Simulate the FIXED sampling loop logic ----------
|
||||||
|
|
||||||
|
static constexpr uint32_t SAMPLE_INTERVAL_MS = 1000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simulates the fixed sender_loop sampling section.
|
||||||
|
*
|
||||||
|
* @param last_sample_ms Tracks the last sample tick (in/out).
|
||||||
|
* @param now_ms Current millis().
|
||||||
|
* @param meter_ok Whether the meter snapshot is fresh.
|
||||||
|
* @param time_jump_pending Whether a time-jump event is pending (in/out).
|
||||||
|
* @param faults Fault counters (in/out).
|
||||||
|
* @return Number of samples generated in the catch-up loop.
|
||||||
|
*/
|
||||||
|
static uint32_t simulate_fixed_sampling(
|
||||||
|
uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok,
|
||||||
|
bool &time_jump_pending, FaultCounters &faults) {
|
||||||
|
|
||||||
|
FaultType last_error = FaultType::None;
|
||||||
|
uint32_t last_error_utc = 0;
|
||||||
|
uint32_t last_error_ms = 0;
|
||||||
|
bool meter_fault_noted = false;
|
||||||
|
|
||||||
|
// Time-jump: one fault per event, outside loop.
|
||||||
|
if (time_jump_pending) {
|
||||||
|
time_jump_pending = false;
|
||||||
|
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
|
||||||
|
meter_fault_noted = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stale meter: one fault per contiguous stale period, outside loop.
|
||||||
|
if (!meter_ok && !meter_fault_noted) {
|
||||||
|
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t samples = 0;
|
||||||
|
while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) {
|
||||||
|
last_sample_ms += SAMPLE_INTERVAL_MS;
|
||||||
|
samples++;
|
||||||
|
}
|
||||||
|
return samples;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simulates the OLD (buggy) sampling loop for comparison.
|
||||||
|
*/
|
||||||
|
static uint32_t simulate_buggy_sampling(
|
||||||
|
uint32_t &last_sample_ms, uint32_t now_ms, bool meter_ok,
|
||||||
|
bool &time_jump_pending, FaultCounters &faults) {
|
||||||
|
|
||||||
|
FaultType last_error = FaultType::None;
|
||||||
|
uint32_t last_error_utc = 0;
|
||||||
|
uint32_t last_error_ms = 0;
|
||||||
|
|
||||||
|
uint32_t samples = 0;
|
||||||
|
while (now_ms - last_sample_ms >= SAMPLE_INTERVAL_MS) {
|
||||||
|
last_sample_ms += SAMPLE_INTERVAL_MS;
|
||||||
|
samples++;
|
||||||
|
if (!meter_ok) {
|
||||||
|
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
|
||||||
|
}
|
||||||
|
if (time_jump_pending) {
|
||||||
|
time_jump_pending = false;
|
||||||
|
note_fault_stub(faults, last_error, last_error_utc, last_error_ms, FaultType::MeterRead);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return samples;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Tests ----------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normal operation: meter is fresh, no blocking. 1 tick per call.
|
||||||
|
* Should produce 0 faults.
|
||||||
|
*/
|
||||||
|
static void test_no_fault_when_meter_fresh() {
|
||||||
|
FaultCounters faults = {};
|
||||||
|
uint32_t last_sample_ms = 0;
|
||||||
|
bool time_jump = false;
|
||||||
|
|
||||||
|
// Simulate 60 consecutive 1s ticks with fresh meter data.
|
||||||
|
for (int i = 1; i <= 60; i++) {
|
||||||
|
simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LoRa TX blocks for 10 seconds while meter is stale.
|
||||||
|
* OLD code: 10 faults. FIXED code: 1 fault.
|
||||||
|
*/
|
||||||
|
static void test_single_fault_after_blocking_stale() {
|
||||||
|
FaultCounters faults = {};
|
||||||
|
uint32_t last_sample_ms = 0;
|
||||||
|
bool time_jump = false;
|
||||||
|
|
||||||
|
// 5 normal ticks with fresh data.
|
||||||
|
for (int i = 1; i <= 5; i++) {
|
||||||
|
simulate_fixed_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
|
||||||
|
}
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
|
||||||
|
|
||||||
|
// LoRa TX blocks for 10s → meter goes stale.
|
||||||
|
// now_ms = 15000, last_sample_ms = 5000 → 10 catch-up ticks.
|
||||||
|
uint32_t samples = simulate_fixed_sampling(last_sample_ms, 15000, false, time_jump, faults);
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(10, samples); // 10 ticks caught up.
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail); // But only 1 fault!
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Demonstrate the OLD buggy behavior: same scenario produces 10 faults.
|
||||||
|
*/
|
||||||
|
static void test_buggy_produces_many_faults() {
|
||||||
|
FaultCounters faults = {};
|
||||||
|
uint32_t last_sample_ms = 0;
|
||||||
|
bool time_jump = false;
|
||||||
|
|
||||||
|
for (int i = 1; i <= 5; i++) {
|
||||||
|
simulate_buggy_sampling(last_sample_ms, i * 1000, true, time_jump, faults);
|
||||||
|
}
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(0, faults.meter_read_fail);
|
||||||
|
|
||||||
|
simulate_buggy_sampling(last_sample_ms, 15000, false, time_jump, faults);
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(10, faults.meter_read_fail); // Buggy: 10 faults for one event.
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Time-jump event should produce exactly 1 additional fault,
|
||||||
|
* regardless of how many ticks are caught up.
|
||||||
|
*/
|
||||||
|
static void test_time_jump_single_fault() {
|
||||||
|
FaultCounters faults = {};
|
||||||
|
uint32_t last_sample_ms = 0;
|
||||||
|
bool time_jump = true; // Pending time-jump.
|
||||||
|
|
||||||
|
// 8 catch-up ticks with stale meter AND time jump pending.
|
||||||
|
uint32_t samples = simulate_fixed_sampling(last_sample_ms, 8000, false, time_jump, faults);
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(8, samples);
|
||||||
|
// Time jump counted as 1, stale suppressed because meter_fault_noted == true.
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(1, faults.meter_read_fail);
|
||||||
|
TEST_ASSERT_FALSE(time_jump);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Repeated stale periods should count 1 fault per call to the sampling function,
|
||||||
|
* not 1 per tick. After 3600s at 1 call/s with meter stale every call,
|
||||||
|
* the FIXED code should produce ≤ 3600 faults (1 per call).
|
||||||
|
* The OLD code would produce the same number (since 1 tick per call).
|
||||||
|
* The difference is when blocking causes N>1 ticks per call.
|
||||||
|
*/
|
||||||
|
static void test_sustained_stale_1hz_no_blocking() {
|
||||||
|
FaultCounters faults = {};
|
||||||
|
uint32_t last_sample_ms = 0;
|
||||||
|
bool time_jump = false;
|
||||||
|
|
||||||
|
// Simulate 1 hour at 1 Hz with meter always stale (no blocking, 1 tick/call).
|
||||||
|
for (uint32_t i = 1; i <= 3600; i++) {
|
||||||
|
simulate_fixed_sampling(last_sample_ms, i * 1000, false, time_jump, faults);
|
||||||
|
}
|
||||||
|
// 1 fault per call = 3600 faults. This correctly reflects 3600 distinct evaluations
|
||||||
|
// where the meter was stale.
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(3600, faults.meter_read_fail);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Worst-case: 1 hour, main loop blocked for 10s every 30s (batch TX + ACK).
|
||||||
|
* Each blocking event catches up 10 ticks with stale meter.
|
||||||
|
*
|
||||||
|
* OLD: 10 faults per blocking event × 120 blocks = 1200 faults,
|
||||||
|
* + 20 normal stale ticks between blocks × 120 = 2400 → total ~3600.
|
||||||
|
*
|
||||||
|
* FIXED: 1 fault per blocking event + 1 per non-blocked stale call.
|
||||||
|
* 120 blocking events + 2400 normal calls = 2520.
|
||||||
|
* (Still correctly counts each loop iteration where meter was stale.)
|
||||||
|
*/
|
||||||
|
static void test_periodic_blocking_reduces_faults() {
|
||||||
|
FaultCounters faults_fixed = {};
|
||||||
|
FaultCounters faults_buggy = {};
|
||||||
|
uint32_t last_fixed = 0;
|
||||||
|
uint32_t last_buggy = 0;
|
||||||
|
bool tj_fixed = false;
|
||||||
|
bool tj_buggy = false;
|
||||||
|
|
||||||
|
uint32_t t = 0;
|
||||||
|
for (int cycle = 0; cycle < 120; cycle++) {
|
||||||
|
// 20s of normal 1Hz polling, meter stale.
|
||||||
|
for (int s = 0; s < 20; s++) {
|
||||||
|
t += 1000;
|
||||||
|
simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
|
||||||
|
simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
|
||||||
|
}
|
||||||
|
// 10s blocking (LoRa TX + ACK), meter stale.
|
||||||
|
t += 10000;
|
||||||
|
simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
|
||||||
|
simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Both produce 3600 samples total.
|
||||||
|
// Buggy: 20*120 normal + 10*120 from catch-up = 3600 faults.
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(3600, faults_buggy.meter_read_fail);
|
||||||
|
// Fixed: 20*120 normal + 1*120 from catch-up = 2520 faults.
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(2520, faults_fixed.meter_read_fail);
|
||||||
|
// Significant reduction: fixed < buggy.
|
||||||
|
TEST_ASSERT_TRUE(faults_fixed.meter_read_fail < faults_buggy.meter_read_fail);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Real scenario: meter works fine most of the time; occasional 5-10s stale
|
||||||
|
* during LoRa TX. With fresh meter otherwise, faults should be minimal.
|
||||||
|
*
|
||||||
|
* 1h = 120 batch cycles of 30s.
|
||||||
|
* Each cycle: 20s meter OK → 10s TX blocking (stale) → continue.
|
||||||
|
* FIXED: 120 faults/h (one per TX stale event).
|
||||||
|
* OLD: ~1200 faults/h (10 per TX stale event).
|
||||||
|
*/
|
||||||
|
static void test_realistic_scenario_mostly_fresh() {
|
||||||
|
FaultCounters faults_fixed = {};
|
||||||
|
FaultCounters faults_buggy = {};
|
||||||
|
uint32_t last_fixed = 0;
|
||||||
|
uint32_t last_buggy = 0;
|
||||||
|
bool tj_fixed = false;
|
||||||
|
bool tj_buggy = false;
|
||||||
|
|
||||||
|
uint32_t t = 0;
|
||||||
|
for (int cycle = 0; cycle < 120; cycle++) {
|
||||||
|
// 20s of fresh meter data.
|
||||||
|
for (int s = 0; s < 20; s++) {
|
||||||
|
t += 1000;
|
||||||
|
simulate_fixed_sampling(last_fixed, t, true, tj_fixed, faults_fixed);
|
||||||
|
simulate_buggy_sampling(last_buggy, t, true, tj_buggy, faults_buggy);
|
||||||
|
}
|
||||||
|
// 10s LoRa blocking, meter goes stale.
|
||||||
|
t += 10000;
|
||||||
|
simulate_fixed_sampling(last_fixed, t, false, tj_fixed, faults_fixed);
|
||||||
|
simulate_buggy_sampling(last_buggy, t, false, tj_buggy, faults_buggy);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fixed: 0 faults during fresh + 1 per stale event = 120 faults/h.
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(120, faults_fixed.meter_read_fail);
|
||||||
|
// Buggy: 0 faults during fresh + 10 per stale event = 1200 faults/h.
|
||||||
|
TEST_ASSERT_EQUAL_UINT32(1200, faults_buggy.meter_read_fail);
|
||||||
|
}
|
||||||
|
|
||||||
|
void setup() {
|
||||||
|
UNITY_BEGIN();
|
||||||
|
RUN_TEST(test_no_fault_when_meter_fresh);
|
||||||
|
RUN_TEST(test_single_fault_after_blocking_stale);
|
||||||
|
RUN_TEST(test_buggy_produces_many_faults);
|
||||||
|
RUN_TEST(test_time_jump_single_fault);
|
||||||
|
RUN_TEST(test_sustained_stale_1hz_no_blocking);
|
||||||
|
RUN_TEST(test_periodic_blocking_reduces_faults);
|
||||||
|
RUN_TEST(test_realistic_scenario_mostly_fresh);
|
||||||
|
UNITY_END();
|
||||||
|
}
|
||||||
|
|
||||||
|
void loop() {}
|
||||||
Reference in New Issue
Block a user