Add sender-local serial diagnostics for pipeline health

This commit is contained in:
2026-02-17 01:12:31 +01:00
parent ea3e99f350
commit cc5881974c
3 changed files with 108 additions and 12 deletions

View File

@@ -3,7 +3,18 @@
#include <Arduino.h> #include <Arduino.h>
#include "data_model.h" #include "data_model.h"
struct MeterDriverStats {
uint32_t frames_ok;
uint32_t frames_parse_fail;
uint32_t rx_overflow;
uint32_t rx_timeout;
uint32_t bytes_rx;
uint32_t last_rx_ms;
uint32_t last_good_frame_ms;
};
void meter_init(); void meter_init();
bool meter_read(MeterData &data); bool meter_read(MeterData &data);
bool meter_poll_frame(const char *&frame, size_t &len); bool meter_poll_frame(const char *&frame, size_t &len);
bool meter_parse_frame(const char *frame, size_t len, MeterData &data); bool meter_parse_frame(const char *frame, size_t len, MeterData &data);
void meter_get_stats(MeterDriverStats &out);

View File

@@ -94,6 +94,12 @@ static uint32_t g_last_debug_log_ms = 0;
static uint32_t g_sender_rx_window_ms = 0; static uint32_t g_sender_rx_window_ms = 0;
static uint32_t g_sender_sleep_ms = 0; static uint32_t g_sender_sleep_ms = 0;
static uint32_t g_sender_power_log_ms = 0; static uint32_t g_sender_power_log_ms = 0;
static uint32_t g_meter_queue_high_water = 0;
static uint32_t g_meter_queue_drop_count = 0;
static uint32_t g_sender_ack_timeout_total = 0;
static uint32_t g_sender_ack_retry_total = 0;
static uint32_t g_sender_ack_rtt_last_ms = 0;
static uint32_t g_sender_ack_rtt_ewma_ms = 0;
static RxRejectReason g_sender_rx_reject_reason = RxRejectReason::None; static RxRejectReason g_sender_rx_reject_reason = RxRejectReason::None;
static uint32_t g_sender_rx_reject_log_ms = 0; static uint32_t g_sender_rx_reject_log_ms = 0;
static RxRejectReason g_receiver_rx_reject_reason = RxRejectReason::None; static RxRejectReason g_receiver_rx_reject_reason = RxRejectReason::None;
@@ -152,6 +158,66 @@ static void serial_debug_printf(const char *fmt, ...) {
Serial.println(buf); Serial.println(buf);
} }
#ifdef ARDUINO_ARCH_ESP32
static void update_meter_queue_high_water() {
if (!g_meter_sample_queue) {
return;
}
uint32_t depth = static_cast<uint32_t>(uxQueueMessagesWaiting(g_meter_sample_queue));
if (depth > g_meter_queue_high_water) {
g_meter_queue_high_water = depth;
}
}
#endif
static void sender_log_diagnostics(uint32_t now_ms) {
if (!SERIAL_DEBUG_MODE) {
return;
}
if (now_ms - g_last_debug_log_ms < 5000) {
return;
}
g_last_debug_log_ms = now_ms;
MeterDriverStats meter_stats = {};
meter_get_stats(meter_stats);
uint32_t queue_depth = 0;
#ifdef ARDUINO_ARCH_ESP32
if (g_meter_sample_queue) {
queue_depth = static_cast<uint32_t>(uxQueueMessagesWaiting(g_meter_sample_queue));
}
#endif
uint32_t meter_age_ms = 0;
if (meter_stats.last_good_frame_ms > 0 && now_ms >= meter_stats.last_good_frame_ms) {
meter_age_ms = now_ms - meter_stats.last_good_frame_ms;
} else if (meter_stats.last_good_frame_ms == 0) {
meter_age_ms = UINT32_MAX;
}
serial_debug_printf(
"diag: q_depth=%lu q_hi=%lu q_drop=%lu batch_q=%u build=%u ack_pending=%u ack_retry_cur=%u ack_retry_total=%lu ack_timeout_total=%lu ack_rtt_last_ms=%lu ack_rtt_ewma_ms=%lu meter_ok=%lu meter_fail=%lu meter_ovf=%lu meter_timeout=%lu meter_age_ms=%lu rx_win_ms=%lu sleep_ms=%lu",
static_cast<unsigned long>(queue_depth),
static_cast<unsigned long>(g_meter_queue_high_water),
static_cast<unsigned long>(g_meter_queue_drop_count),
static_cast<unsigned>(g_batch_count),
static_cast<unsigned>(g_build_count),
g_batch_ack_pending ? 1U : 0U,
static_cast<unsigned>(g_batch_retry_count),
static_cast<unsigned long>(g_sender_ack_retry_total),
static_cast<unsigned long>(g_sender_ack_timeout_total),
static_cast<unsigned long>(g_sender_ack_rtt_last_ms),
static_cast<unsigned long>(g_sender_ack_rtt_ewma_ms),
static_cast<unsigned long>(meter_stats.frames_ok),
static_cast<unsigned long>(meter_stats.frames_parse_fail),
static_cast<unsigned long>(meter_stats.rx_overflow),
static_cast<unsigned long>(meter_stats.rx_timeout),
static_cast<unsigned long>(meter_age_ms),
static_cast<unsigned long>(g_sender_rx_window_ms),
static_cast<unsigned long>(g_sender_sleep_ms));
}
static uint8_t bit_count32(uint32_t value) { static uint8_t bit_count32(uint32_t value) {
uint8_t count = 0; uint8_t count = 0;
while (value != 0) { while (value != 0) {
@@ -257,11 +323,17 @@ static void meter_queue_push_latest(const MeterSampleEvent &event) {
return; return;
} }
if (xQueueSend(g_meter_sample_queue, &event, 0) == pdTRUE) { if (xQueueSend(g_meter_sample_queue, &event, 0) == pdTRUE) {
update_meter_queue_high_water();
return; return;
} }
g_meter_queue_drop_count++;
MeterSampleEvent dropped = {}; MeterSampleEvent dropped = {};
xQueueReceive(g_meter_sample_queue, &dropped, 0); xQueueReceive(g_meter_sample_queue, &dropped, 0);
if (xQueueSend(g_meter_sample_queue, &event, 0) != pdTRUE && SERIAL_DEBUG_MODE) { if (xQueueSend(g_meter_sample_queue, &event, 0) == pdTRUE) {
update_meter_queue_high_water();
return;
}
if (SERIAL_DEBUG_MODE) {
serial_debug_printf("meter: queue push failed"); serial_debug_printf("meter: queue push failed");
} }
} }
@@ -1157,17 +1229,7 @@ static void sender_loop() {
uint32_t now_ms = millis(); uint32_t now_ms = millis();
display_set_sender_queue(g_batch_count, g_build_count > 0); display_set_sender_queue(g_batch_count, g_build_count > 0);
display_set_sender_batches(g_last_acked_batch_id, g_batch_id); display_set_sender_batches(g_last_acked_batch_id, g_batch_id);
if (SERIAL_DEBUG_MODE && now_ms - g_last_debug_log_ms >= 5000) { sender_log_diagnostics(now_ms);
g_last_debug_log_ms = now_ms;
serial_debug_printf("state: Q=%u%s A=%u C=%u inflight=%u ack_pending=%u retries=%u",
g_batch_count,
g_build_count > 0 ? "+" : "",
g_last_acked_batch_id,
g_batch_id,
g_inflight_count,
g_batch_ack_pending ? 1 : 0,
g_batch_retry_count);
}
meter_reader_pump(now_ms); meter_reader_pump(now_ms);
@@ -1333,6 +1395,12 @@ static void sender_loop() {
uint32_t ack_epoch = read_u32_be(&ack_pkt.payload[3]); uint32_t ack_epoch = read_u32_be(&ack_pkt.payload[3]);
bool set_time = false; bool set_time = false;
if (g_batch_ack_pending && ack_id == g_last_sent_batch_id) { if (g_batch_ack_pending && ack_id == g_last_sent_batch_id) {
g_sender_ack_rtt_last_ms = rx_elapsed;
if (g_sender_ack_rtt_ewma_ms == 0) {
g_sender_ack_rtt_ewma_ms = rx_elapsed;
} else {
g_sender_ack_rtt_ewma_ms = (g_sender_ack_rtt_ewma_ms * 3U + rx_elapsed + 1U) / 4U;
}
if (time_valid == 1 && ack_epoch >= MIN_ACCEPTED_EPOCH_UTC) { if (time_valid == 1 && ack_epoch >= MIN_ACCEPTED_EPOCH_UTC) {
time_set_utc(ack_epoch); time_set_utc(ack_epoch);
g_time_acquired = true; g_time_acquired = true;
@@ -1363,8 +1431,10 @@ static void sender_loop() {
} }
if (g_batch_ack_pending && (now_ms - g_last_batch_send_ms >= g_batch_ack_timeout_ms)) { if (g_batch_ack_pending && (now_ms - g_last_batch_send_ms >= g_batch_ack_timeout_ms)) {
g_sender_ack_timeout_total++;
if (g_batch_retry_count < BATCH_MAX_RETRIES) { if (g_batch_retry_count < BATCH_MAX_RETRIES) {
g_batch_retry_count++; g_batch_retry_count++;
g_sender_ack_retry_total++;
serial_debug_printf("ack: timeout batch_id=%u retry=%u", g_inflight_batch_id, g_batch_retry_count); serial_debug_printf("ack: timeout batch_id=%u retry=%u", g_inflight_batch_id, g_batch_retry_count);
resend_inflight_batch(last_sample_ts()); resend_inflight_batch(last_sample_ts());
} else { } else {

View File

@@ -24,6 +24,7 @@ static uint32_t g_frames_parse_fail = 0;
static uint32_t g_rx_overflow = 0; static uint32_t g_rx_overflow = 0;
static uint32_t g_rx_timeout = 0; static uint32_t g_rx_timeout = 0;
static uint32_t g_last_log_ms = 0; static uint32_t g_last_log_ms = 0;
static uint32_t g_last_good_frame_ms = 0;
static constexpr uint32_t METER_FIXED_FRAC_MAX_DIV = 10000; static constexpr uint32_t METER_FIXED_FRAC_MAX_DIV = 10000;
void meter_init() { void meter_init() {
@@ -251,6 +252,16 @@ static void meter_debug_log() {
static_cast<unsigned long>(g_bytes_rx)); static_cast<unsigned long>(g_bytes_rx));
} }
void meter_get_stats(MeterDriverStats &out) {
out.frames_ok = g_frames_ok;
out.frames_parse_fail = g_frames_parse_fail;
out.rx_overflow = g_rx_overflow;
out.rx_timeout = g_rx_timeout;
out.bytes_rx = g_bytes_rx;
out.last_rx_ms = g_last_rx_ms;
out.last_good_frame_ms = g_last_good_frame_ms;
}
bool meter_poll_frame(const char *&frame, size_t &len) { bool meter_poll_frame(const char *&frame, size_t &len) {
frame = nullptr; frame = nullptr;
len = 0; len = 0;
@@ -332,6 +343,7 @@ bool meter_parse_frame(const char *frame, size_t len, MeterData &data) {
data.valid = energy_ok || total_p_ok || p1_ok || p2_ok || p3_ok; data.valid = energy_ok || total_p_ok || p1_ok || p2_ok || p3_ok;
if (data.valid) { if (data.valid) {
g_frames_ok++; g_frames_ok++;
g_last_good_frame_ms = millis();
} else { } else {
g_frames_parse_fail++; g_frames_parse_fail++;
} }
@@ -343,6 +355,7 @@ bool meter_parse_frame(const char *frame, size_t len, MeterData &data) {
data.valid = energy_ok || total_p_ok || p1_ok || p2_ok || p3_ok; data.valid = energy_ok || total_p_ok || p1_ok || p2_ok || p3_ok;
if (data.valid) { if (data.valid) {
g_frames_ok++; g_frames_ok++;
g_last_good_frame_ms = millis();
} else { } else {
g_frames_parse_fail++; g_frames_parse_fail++;
} }
@@ -402,6 +415,7 @@ bool meter_parse_frame(const char *frame, size_t len, MeterData &data) {
if (energy_ok && total_p_ok && p1_ok && p2_ok && p3_ok && data.meter_seconds_valid) { if (energy_ok && total_p_ok && p1_ok && p2_ok && p3_ok && data.meter_seconds_valid) {
data.valid = true; data.valid = true;
g_frames_ok++; g_frames_ok++;
g_last_good_frame_ms = millis();
return true; return true;
} }
@@ -416,6 +430,7 @@ bool meter_parse_frame(const char *frame, size_t len, MeterData &data) {
data.valid = got_any; data.valid = got_any;
if (data.valid) { if (data.valid) {
g_frames_ok++; g_frames_ok++;
g_last_good_frame_ms = millis();
} else { } else {
g_frames_parse_fail++; g_frames_parse_fail++;
} }