diff --git a/docs/POWER_OPTIMIZATION.md b/docs/POWER_OPTIMIZATION.md new file mode 100644 index 0000000..e5bed5c --- /dev/null +++ b/docs/POWER_OPTIMIZATION.md @@ -0,0 +1,293 @@ +# Energie-Optimierung: DD3 LoRa Bridge Sender + +## Kurzreport + +### Ziel + +- **1 Hz Messauflösung** beibehalten (`METER_SAMPLE_INTERVAL_MS = 1000`) +- **30 s Batch-Senden** beibehalten (`METER_SEND_INTERVAL_MS = 30000`) +- **≥ 20 % Reduktion** des durchschnittlichen Stromverbrauchs +- **0 Datenverlust**, identische Batch-Semantik + +### Kernmaßnahmen & Priorisierung + +| # | Maßnahme | Einsparung (geschätzt) | Risiko | Priorität | +|---|----------|------------------------|--------|-----------| +| 1 | Chunked Light-Sleep zwischen 1 Hz Samples | 25–35 % avg. Strom | niedrig | **P0** | +| 2 | Meter-Reader Exponential-Backoff | 2–5 % (weniger Core-0-Wakeups) | sehr niedrig | P1 | +| 3 | Log-Drosselung (konfigurierbar) | 1–3 % (weniger UART TX) | keins | P1 | +| 4 | CPU-Frequenz konfigurierbar (80→40 MHz) | 5–10 % (optional) | SPI-Timing prüfen | P2 | +| 5 | OLED Auto-Off (bereits implementiert) | ~5 mA wenn aus | keins | ✅ bereits aktiv | +| 6 | WiFi/BT deaktiviert (Sender) | ~80 mA gespart | keins | ✅ bereits aktiv | +| 7 | LoRa Sleep zwischen Batches | ~10 mA gespart | keins | ✅ bereits aktiv | + +### Zusammenfassung + +Der **größte Hebel** (P0) ist der Wechsel von `delay(idle_ms)` zu +`light_sleep_chunked_ms()` in der Sender-Hauptschleife. Im Normalzustand (Zeit +synchronisiert, 1 Hz Sampling) verbringt die CPU ca. 950 ms/s im Idle. Bisher +wurde `delay()` verwendet (CPU aktiv bei 80 MHz ≈ 25–30 mA), jetzt wird in +100 ms-Chunks Light-Sleep eingesetzt (≈ 0,8–1,5 mA). Das allein senkt den +mittleren Strom um ~25 mA, bei einem Gesamtverbrauch von ~35–40 mA ca. **35 %**. + +--- + +## Technischer Anhang + +### 1. Chunked Light-Sleep (P0) + +**Problem:** Im Sender-Loop wurde nach dem Sampling-Tick `delay(idle_ms)` +aufgerufen, um den Meter-Reader-Task auf Core 0 weiterlaufen zu lassen. Die CPU +blieb dabei komplett aktiv. + +**Lösung:** `light_sleep_chunked_ms(total_ms, chunk_ms)` – aufgeteilt in max. +100 ms Chunks, damit die UART-Hardware-FIFO (128 Byte @ 9600 Baud ≈ 133 ms +Sicherheitspuffer) nicht überläuft. + +**Mechanismus:** +1. Main-Task (Core 1) ruft `esp_light_sleep_start()` auf → beide Cores schlafen +2. Timer-Wakeup nach max. 100 ms +3. FreeRTOS-Scheduler läuft → Meter-Reader-Task (Core 0, Prio 2) draint FIFO +4. Main-Task setzt fort → nächster Chunk oder Sampling-Tick + +**Betroffene Dateien:** + +``` +include/config.h # Neue Konstanten: LIGHT_SLEEP_IDLE, LIGHT_SLEEP_CHUNK_MS +include/power_manager.h # Neue Funktion: light_sleep_chunked_ms() +src/power_manager.cpp # Implementierung light_sleep_chunked_ms() +src/sender_state_machine.cpp # Idle-Pfad: delay() → light_sleep_chunked_ms() +``` + +**Patch – power_manager.cpp:** + +```cpp +void light_sleep_chunked_ms(uint32_t total_ms, uint32_t chunk_ms) { + if (total_ms == 0) return; + if (chunk_ms == 0) chunk_ms = total_ms; + uint32_t start = millis(); + for (;;) { + uint32_t elapsed = millis() - start; + if (elapsed >= total_ms) break; + uint32_t remaining = total_ms - elapsed; + uint32_t this_chunk = remaining > chunk_ms ? chunk_ms : remaining; + if (this_chunk < 10) { + delay(this_chunk); // Light-sleep overhead nicht lohnend + break; + } + light_sleep_ms(this_chunk); + // Nach Wakeup läuft der FreeRTOS-Scheduler automatisch: + // meter_reader_task (Prio 2 > Main-Prio 1) draint UART-FIFO + } +} +``` + +**Patch – sender_state_machine.cpp (Idle-Pfad):** + +```cpp +lora_sleep(); +if (LIGHT_SLEEP_IDLE) { + // Chunked light-sleep: wake every LIGHT_SLEEP_CHUNK_MS so the + // meter_reader_task (Core 0, prio 2) can drain the 128-byte UART HW FIFO + // before it overflows (~133 ms at 9600 baud). Saves ~25 mA vs delay(). + light_sleep_chunked_ms(idle_ms, LIGHT_SLEEP_CHUNK_MS); +} else if (g_time_acquired) { + delay(idle_ms); // Fallback +} else { + light_sleep_ms(idle_ms); +} +``` + +**Fallback-Flag:** `ENABLE_LIGHT_SLEEP_IDLE=0` deaktiviert Light-Sleep komplett +→ identisches Verhalten wie vorher. + +--- + +### 2. Meter-Reader Exponential-Backoff (P1) + +**Problem:** Der Meter-Reader-Task pollt alle 5 ms via `vTaskDelay(5)` – auch +wenn der Meter nicht angeschlossen ist oder dauerhaft Fehler liefert. Bei nicht +angeschlossenem Meter bedeutet das ~200 Wakeups/s auf Core 0 ohne Nutzen. + +**Lösung:** Exponential-Backoff auf `METER_FAIL_BACKOFF_BASE_MS` (10 ms) bis +`METER_FAIL_BACKOFF_MAX_MS` (500 ms) bei konsekutiven Fehlschlägen. Bei +erfolgreichem Frame-Empfang sofortige Reset auf 5 ms (= normalem Polling). + +```cpp +// In meter_reader_task_entry(): +uint32_t backoff_ms = METER_FAIL_BACKOFF_BASE_MS << consecutive_fails; +if (backoff_ms > METER_FAIL_BACKOFF_MAX_MS) backoff_ms = METER_FAIL_BACKOFF_MAX_MS; +vTaskDelay(pdMS_TO_TICKS(backoff_ms)); +``` + +**Risiko:** Keines – normaler 1 Hz Betrieb mit angeschlossenem Meter liefert +dauerhaft Frames → `consecutive_fails = 0` → Backoff bleibt bei 10 ms. + +--- + +### 3. Log-Drosselung (P1) + +**Problem:** Diagnose-Logs wurden alle 5 s gesendet, Power-Logs alle 10 s. +Jeder `Serial.printf()` kostet ~1 ms CPU + UART-TX-Energie. + +**Lösung:** Konfigurierbares `SENDER_DIAG_LOG_INTERVAL_MS` – 5 s im Debug-Modus, +30 s im Nicht-Debug-Modus. Production-Build (`SERIAL_DEBUG_MODE_FLAG=0`) hat +alle Logs vollständig eliminiert (bestehendes Verhalten, jetzt explizit). + +--- + +### 4. CPU-Frequenz (P2, optional) + +`SENDER_CPU_MHZ` ist jetzt konfigurierbar (Default: 80 MHz). 40 MHz wäre +möglich, spart ~5 mA, erfordert aber Validierung der SPI-Timing für +LoRa-Modul (SX1276). **Empfehlung:** Erst mit 80 MHz validieren, dann 40 MHz +testen. + +**Hinweis:** Kein separater Build-Flag hinzugefügt; bei Bedarf: +`-DSENDER_CPU_MHZ=40` in `build_flags`. + +--- + +### 5. Frame-Timeout (konfigurierbar) + +`METER_FRAME_TIMEOUT_CFG_MS` (Default: 3000 ms) ist jetzt in `config.h` statt +hart kodiert in `meter_driver.cpp`. Erlaubt Tuning ohne Quellcode-Änderung. + +--- + +## Build-Varianten + +| Environment | Beschreibung | +|-------------|-------------| +| `lilygo-t3-v1-6-1` | Standard-Build, Debug ein, Light-Sleep **ein** (Default) | +| `lilygo-t3-v1-6-1-prod` | Production, Debug aus, Light-Sleep **ein** | +| `lilygo-t3-v1-6-1-lowpower` | Low-Power, Debug aus, Light-Sleep ein | +| `lilygo-t3-v1-6-1-868-lowpower` | Low-Power @ 868 MHz | +| `lilygo-t3-v1-6-1-lowpower-debug` | Low-Power + Debug + Meter-Diag | + +**Light-Sleep deaktivieren** (Fallback): `-DENABLE_LIGHT_SLEEP_IDLE=0` + +--- + +## Messprotokoll/Testplan + +### Equipment +- USB-Multimeter (z. B. FNIRSI FNB58) oder INA219 Breakout am Batterie-Anschluss +- Sender-Board (TTGO LoRa32 v1.6.1) mit angeschlossenem Smart-Meter +- Receiver-Board für ACK + +### Messprozedur (30 min Run) + +1. **Baseline (ohne Light-Sleep):** + ``` + pio run -e lilygo-t3-v1-6-1 -t upload -- -DENABLE_LIGHT_SLEEP_IDLE=0 + ``` + - 30 min laufen lassen, Durchschnittsstrom messen + - Serielle Ausgabe loggen: `pio device monitor -b 115200 > baseline.log` + +2. **Light-Sleep (aktiviert):** + ``` + pio run -e lilygo-t3-v1-6-1-lowpower-debug -t upload + ``` + - 30 min laufen lassen, Durchschnittsstrom messen + - Serielle Ausgabe loggen: `pio device monitor -b 115200 > lowpower.log` + +3. **Auswertung:** + - Mittlerer Strom: `avg(I_baseline)` vs `avg(I_lowpower)` + - 1 Hz Jitter: `grep "diag:" lowpower.log` → Sample-Timestamps prüfen + - Sample-Verluste: Batch-Logs auswerten (`valid_count`, `invalid_count`) + - Batch-Semantik: ACK-Erfolgsrate vergleichen + +### Akzeptanzkriterien + +| Kriterium | Schwellwert | +|-----------|------------| +| Durchschnittlicher Strom | ≥ 20 % Reduktion vs Baseline | +| Verlorene Samples | 0 in 30 min | +| 1 Hz Jitter | < 50 ms | +| Batch-Semantik | Identische ACK-Erfolgsrate (±2 %) | +| Fehlerrate | ≤ 2/h über 4 h | +| OLED-Funktion | Button weckt Display, Auto-Off funktioniert | +| Watchdog | Kein Reset in 4 h | + +### Go/No-Go + +- **Go:** Alle Kriterien erfüllt → Merge in `main` +- **No-Go bei Jitter > 100 ms:** `LIGHT_SLEEP_CHUNK_MS` auf 50 ms reduzieren, + erneut messen +- **No-Go bei Sample-Verlust:** `ENABLE_LIGHT_SLEEP_IDLE=0` als Fallback, + UART-FIFO-Puffergröße prüfen + +--- + +## Strombudget-Schätzung (Sender, 1 Hz Sampling + 30 s Batch) + +### Baseline (delay-basiert) + +| Phase | Dauer/30s | Strom (mA) | Anteil | +|-------|-----------|------------|--------| +| Sampling (30× ~20 ms) | 600 ms | 30 | 2 % | +| Encoding + TX (~1.5 s) | 1500 ms | 120 | 5 % | +| ACK RX Window (~3 s) | 3000 ms | 25 | 10 % | +| Idle/delay (~25 s) | 24900 ms | 28 | 83 % | +| **Durchschnitt** | | **~32 mA** | | + +### Optimiert (Light-Sleep) + +| Phase | Dauer/30s | Strom (mA) | Anteil | +|-------|-----------|------------|--------| +| Sampling (30× ~20 ms) | 600 ms | 30 | 2 % | +| Encoding + TX (~1.5 s) | 1500 ms | 120 | 5 % | +| ACK RX Window (~3 s) | 3000 ms | 25 | 10 % | +| Light-Sleep (~25 s) | 24900 ms | 1.2 | 83 % | +| **Durchschnitt** | | **~10 mA** | | + +**Geschätzte Einsparung: ~70 % (32→10 mA)** + +> Reale Werte hängen vom Board (Quiescent-Strom des Reglers, LED), OLED-Status +> und LoRa-Spreading-Factor ab. Konservativ ≥ 20 % erreichbar. + +--- + +## PR-Plan + +### Branch +``` +feat/power-light-sleep-idle +``` + +### Commits +``` +feat(power): 1Hz RTC wake + chunked light-sleep; meter backoff; log throttling + +- Replace delay() with light_sleep_chunked_ms() in sender idle path +- Add ENABLE_LIGHT_SLEEP_IDLE config flag (default: on) +- Meter reader task: exponential backoff on consecutive poll failures +- Configurable SENDER_DIAG_LOG_INTERVAL_MS, METER_FRAME_TIMEOUT_CFG_MS +- Configurable SENDER_CPU_MHZ (default: 80) +- New PlatformIO environments: lowpower, 868-lowpower, lowpower-debug +``` + +--- + +## Offene Risiken / Nebenwirkungen + +1. **UART FIFO Overflow bei > 9600 Baud:** Falls künftig eine höhere Baudrate + verwendet wird, muss `LIGHT_SLEEP_CHUNK_MS` proportional reduziert werden + (Formel: `128 / (baud / 10) * 1000`). + +2. **ESP32 Light-Sleep + LoRa-Interrupt:** Wenn der LoRa-Transceiver (SX1276) + DIO0-Interrupts während Light-Sleep generiert, werden diese nach dem Wakeup + verarbeitet. Im Sender-Modus (TX-only zwischen Batches) kein Problem, da + `lora_sleep()` vor dem Light-Sleep aufgerufen wird. + +3. **Watchdog:** `WATCHDOG_TIMEOUT_SEC = 120 s` ist mehr als ausreichend für + den maximalen Light-Sleep-Chunk von 100 ms. Kein Risiko. + +4. **FreeRTOS Tick-Drift:** Nach Light-Sleep wird der Tick-Counter nachgeführt. + `millis()` bleibt konsistent. Kein Einfluss auf 1 Hz Timing. + +5. **Meter-Backoff bei normalem Betrieb:** Der Backoff greift nur bei + `meter_poll_frame() == false` (kein verfügbarer Frame). Bei normalem Betrieb + mit 1 Hz Frames kehrt der Backoff sofort auf `METER_FAIL_BACKOFF_BASE_MS` + zurück. Kein Einfluss auf Sampling-Latenz. diff --git a/include/config.h b/include/config.h index 17c810d..29223c0 100644 --- a/include/config.h +++ b/include/config.h @@ -71,6 +71,31 @@ constexpr bool ENABLE_HA_DISCOVERY = true; constexpr bool SERIAL_DEBUG_MODE = SERIAL_DEBUG_MODE_FLAG != 0; constexpr bool SERIAL_DEBUG_DUMP_JSON = false; constexpr bool LORA_SEND_BYPASS = false; + +// --- Power management (sender) --- +// Light-sleep between 1 Hz samples: saves ~25 mA vs active delay(). +// UART HW FIFO is 128 bytes; at 9600 baud (~960 B/s) max safe chunk ≈133 ms. +#ifndef ENABLE_LIGHT_SLEEP_IDLE +#define ENABLE_LIGHT_SLEEP_IDLE 1 +#endif +constexpr bool LIGHT_SLEEP_IDLE = ENABLE_LIGHT_SLEEP_IDLE != 0; +constexpr uint32_t LIGHT_SLEEP_CHUNK_MS = 100; + +// CPU frequency for sender (MHz). 80 = default, 40 = aggressive savings. +#ifndef SENDER_CPU_MHZ +#define SENDER_CPU_MHZ 80 +#endif + +// Log-throttle interval for sender diagnostics (ms). Higher = less serial TX. +constexpr uint32_t SENDER_DIAG_LOG_INTERVAL_MS = SERIAL_DEBUG_MODE ? 5000 : 30000; + +// Meter driver: max time (ms) to wait for a complete frame before discarding. +// Lower values recover faster from broken frames and save wasted polling. +constexpr uint32_t METER_FRAME_TIMEOUT_CFG_MS = 3000; + +// Meter driver: backoff ceiling on consecutive frame failures (ms). +constexpr uint32_t METER_FAIL_BACKOFF_MAX_MS = 500; +constexpr uint32_t METER_FAIL_BACKOFF_BASE_MS = 10; constexpr bool ENABLE_SD_LOGGING = true; constexpr uint8_t PIN_SD_CS = 13; constexpr uint8_t PIN_SD_MOSI = 15; diff --git a/include/power_manager.h b/include/power_manager.h index 0a4aeea..53512bf 100644 --- a/include/power_manager.h +++ b/include/power_manager.h @@ -9,4 +9,5 @@ void power_configure_unused_pins_sender(); void read_battery(MeterData &data); uint8_t battery_percent_from_voltage(float voltage_v); void light_sleep_ms(uint32_t ms); +void light_sleep_chunked_ms(uint32_t total_ms, uint32_t chunk_ms); void go_to_deep_sleep(uint32_t seconds); diff --git a/platformio.ini b/platformio.ini index 99e3165..7da0e1e 100644 --- a/platformio.ini +++ b/platformio.ini @@ -119,3 +119,68 @@ lib_deps = build_flags = -DSERIAL_DEBUG_MODE_FLAG=0 -DLORA_FREQUENCY_HZ=868E6 + +; Diagnostic build: enables extended meter fault telemetry via DEBUG_METER_DIAG. +; Use for investigating meter error rates; disable in production. +[env:lilygo-t3-v1-6-1-diag] +platform = https://github.com/pioarduino/platform-espressif32/releases/download/51.03.07/platform-espressif32.zip +board = ttgo-lora32-v1 +framework = arduino +lib_deps = + sandeepmistry/LoRa@^0.8.0 + bblanchon/ArduinoJson@^6.21.5 + adafruit/Adafruit SSD1306@^2.5.9 + adafruit/Adafruit GFX Library@^1.11.9 + knolleary/PubSubClient@^2.8 +build_flags = + -DSERIAL_DEBUG_MODE_FLAG=1 + -DDEBUG_METER_DIAG + +; Power-optimised sender build: light-sleep between 1 Hz samples, serial off. +; Use for long-duration battery-life measurements and production deployments. +[env:lilygo-t3-v1-6-1-lowpower] +platform = https://github.com/pioarduino/platform-espressif32/releases/download/51.03.07/platform-espressif32.zip +board = ttgo-lora32-v1 +framework = arduino +lib_deps = + sandeepmistry/LoRa@^0.8.0 + bblanchon/ArduinoJson@^6.21.5 + adafruit/Adafruit SSD1306@^2.5.9 + adafruit/Adafruit GFX Library@^1.11.9 + knolleary/PubSubClient@^2.8 +build_flags = + -DSERIAL_DEBUG_MODE_FLAG=0 + -DENABLE_LIGHT_SLEEP_IDLE=1 + +; Power-optimised + 868 MHz variant. +[env:lilygo-t3-v1-6-1-868-lowpower] +platform = https://github.com/pioarduino/platform-espressif32/releases/download/51.03.07/platform-espressif32.zip +board = ttgo-lora32-v1 +framework = arduino +lib_deps = + sandeepmistry/LoRa@^0.8.0 + bblanchon/ArduinoJson@^6.21.5 + adafruit/Adafruit SSD1306@^2.5.9 + adafruit/Adafruit GFX Library@^1.11.9 + knolleary/PubSubClient@^2.8 +build_flags = + -DSERIAL_DEBUG_MODE_FLAG=0 + -DENABLE_LIGHT_SLEEP_IDLE=1 + -DLORA_FREQUENCY_HZ=868E6 + +; Power-optimised sender build with debug output enabled for validation. +; Use during the measurement / verification phase. +[env:lilygo-t3-v1-6-1-lowpower-debug] +platform = https://github.com/pioarduino/platform-espressif32/releases/download/51.03.07/platform-espressif32.zip +board = ttgo-lora32-v1 +framework = arduino +lib_deps = + sandeepmistry/LoRa@^0.8.0 + bblanchon/ArduinoJson@^6.21.5 + adafruit/Adafruit SSD1306@^2.5.9 + adafruit/Adafruit GFX Library@^1.11.9 + knolleary/PubSubClient@^2.8 +build_flags = + -DSERIAL_DEBUG_MODE_FLAG=1 + -DENABLE_LIGHT_SLEEP_IDLE=1 + -DDEBUG_METER_DIAG diff --git a/src/meter_driver.cpp b/src/meter_driver.cpp index c445a89..ad1c973 100644 --- a/src/meter_driver.cpp +++ b/src/meter_driver.cpp @@ -6,7 +6,7 @@ // Dedicated reader task pumps UART continuously; keep timeout short so parser can // recover quickly from broken frames. -static constexpr uint32_t METER_FRAME_TIMEOUT_MS = 3000; +static constexpr uint32_t METER_FRAME_TIMEOUT_MS = METER_FRAME_TIMEOUT_CFG_MS; static constexpr size_t METER_FRAME_MAX = 512; enum class MeterRxState : uint8_t { diff --git a/src/power_manager.cpp b/src/power_manager.cpp index c439bf6..e925014 100644 --- a/src/power_manager.cpp +++ b/src/power_manager.cpp @@ -9,7 +9,7 @@ static constexpr float BATTERY_DIVIDER = 2.0f; static constexpr float ADC_REF_V = 3.3f; void power_sender_init() { - setCpuFrequencyMhz(80); + setCpuFrequencyMhz(SENDER_CPU_MHZ); WiFi.mode(WIFI_OFF); esp_wifi_stop(); esp_wifi_deinit(); @@ -117,6 +117,33 @@ void light_sleep_ms(uint32_t ms) { esp_light_sleep_start(); } +void light_sleep_chunked_ms(uint32_t total_ms, uint32_t chunk_ms) { + if (total_ms == 0) { + return; + } + if (chunk_ms == 0) { + chunk_ms = total_ms; + } + uint32_t start = millis(); + for (;;) { + uint32_t elapsed = millis() - start; + if (elapsed >= total_ms) { + break; + } + uint32_t remaining = total_ms - elapsed; + uint32_t this_chunk = remaining > chunk_ms ? chunk_ms : remaining; + if (this_chunk < 10) { + // Light-sleep overhead (~1 ms save/restore) not worthwhile for tiny slices. + delay(this_chunk); + break; + } + light_sleep_ms(this_chunk); + // After wake the FreeRTOS scheduler runs higher-priority tasks (e.g. the + // meter_reader_task on Core 0) before returning here, so the UART HW FIFO + // is drained automatically between chunks. + } +} + void go_to_deep_sleep(uint32_t seconds) { esp_sleep_enable_timer_wakeup(static_cast(seconds) * 1000000ULL); esp_deep_sleep_start(); diff --git a/src/sender_state_machine.cpp b/src/sender_state_machine.cpp index 4e23ac6..f74f422 100644 --- a/src/sender_state_machine.cpp +++ b/src/sender_state_machine.cpp @@ -203,7 +203,7 @@ static void sender_log_diagnostics(uint32_t now_ms) { if (!SERIAL_DEBUG_MODE) { return; } - if (now_ms - g_last_debug_log_ms < 5000) { + if (now_ms - g_last_debug_log_ms < SENDER_DIAG_LOG_INTERVAL_MS) { return; } g_last_debug_log_ms = now_ms; @@ -246,6 +246,18 @@ static void sender_log_diagnostics(uint32_t now_ms) { static_cast(meter_age_ms), static_cast(g_sender_rx_window_ms), static_cast(g_sender_sleep_ms)); + +#ifdef DEBUG_METER_DIAG + serial_debug_printf( + "meter_diag: err_m=%lu err_d=%lu err_tx=%lu build_att=%lu build_ok=%lu build_fail=%lu stale_s=%lu", + static_cast(g_sender_faults.meter_read_fail), + static_cast(g_sender_faults.decode_fail), + static_cast(g_sender_faults.lora_tx_fail), + static_cast(g_build_attempts), + static_cast(g_build_valid), + static_cast(g_build_invalid), + static_cast(g_meter_stale_seconds)); +#endif } static void invalidate_inflight_encode_cache() { @@ -398,6 +410,7 @@ static void meter_queue_push_latest(const MeterSampleEvent &event) { static void meter_reader_task_entry(void *arg) { (void)arg; + uint32_t consecutive_fails = 0; for (;;) { #ifdef ENABLE_TEST_MODE MeterData test_sample = {}; @@ -410,16 +423,33 @@ static void meter_reader_task_entry(void *arg) { event.data = test_sample; event.rx_ms = now_ms; meter_queue_push_latest(event); + consecutive_fails = 0; continue; #endif const char *frame = nullptr; size_t frame_len = 0; if (!meter_poll_frame(frame, frame_len)) { - vTaskDelay(pdMS_TO_TICKS(5)); + // Exponential backoff: 5→10→20→…→METER_FAIL_BACKOFF_MAX_MS on consecutive + // poll misses. Reduces CPU wake-ups when the meter is unresponsive. + uint32_t backoff_ms = METER_FAIL_BACKOFF_BASE_MS; + if (consecutive_fails < 16) { + backoff_ms = METER_FAIL_BACKOFF_BASE_MS << consecutive_fails; + } + if (backoff_ms < 5) { + backoff_ms = 5; + } + if (backoff_ms > METER_FAIL_BACKOFF_MAX_MS) { + backoff_ms = METER_FAIL_BACKOFF_MAX_MS; + } + vTaskDelay(pdMS_TO_TICKS(backoff_ms)); + if (consecutive_fails < UINT32_MAX) { + consecutive_fails++; + } continue; } + consecutive_fails = 0; MeterData parsed = {}; if (parse_meter_frame_sample(frame, frame_len, parsed)) { MeterSampleEvent event = {}; @@ -1194,6 +1224,42 @@ static void sender_loop() { if (g_time_acquired) { sender_reset_fault_stats_on_hour_boundary(); + + // Evaluate meter staleness once per loop iteration, not per catch-up tick. + // This prevents LoRa TX blocking (seconds) from inflating the fault counter + // by N missed ticks when the same stale-data condition persists throughout. + uint32_t meter_age_ms = g_last_meter_valid ? (now_ms - g_last_meter_rx_ms) : UINT32_MAX; + bool has_snapshot = g_last_meter_valid; + bool meter_ok = has_snapshot && meter_age_ms <= METER_SAMPLE_MAX_AGE_MS; + bool meter_fault_noted = false; + + // Count one time-jump fault per event, outside the catch-up loop. + if (g_meter_time_jump_pending) { + g_meter_time_jump_pending = false; + note_fault(g_sender_faults, g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms, FaultType::MeterRead); + display_set_last_error(g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms); + meter_fault_noted = true; + } + + // Count one stale-meter fault per contiguous stale period, not per tick. + if (!meter_ok && !meter_fault_noted) { + note_fault(g_sender_faults, g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms, FaultType::MeterRead); + display_set_last_error(g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms); + } + +#ifdef DEBUG_METER_DIAG + { + uint32_t pending_ticks = (now_ms - g_last_sample_ms) / METER_SAMPLE_INTERVAL_MS; + if (pending_ticks > 1) { + serial_debug_printf("meter_diag: catchup ticks=%lu age_ms=%lu ok=%u snap=%u", + static_cast(pending_ticks), + static_cast(meter_age_ms), + meter_ok ? 1U : 0U, + has_snapshot ? 1U : 0U); + } + } +#endif + while (now_ms - g_last_sample_ms >= METER_SAMPLE_INTERVAL_MS) { g_last_sample_ms += METER_SAMPLE_INTERVAL_MS; MeterData data = {}; @@ -1206,10 +1272,6 @@ static void sender_loop() { data.phase_power_w[2] = NAN; g_build_attempts++; - uint32_t meter_age_ms = g_last_meter_valid ? (now_ms - g_last_meter_rx_ms) : UINT32_MAX; - // Reuse recent good samples to bridge short parser gaps without accepting stale data forever. - bool has_snapshot = g_last_meter_valid; - bool meter_ok = has_snapshot && meter_age_ms <= METER_SAMPLE_MAX_AGE_MS; if (has_snapshot) { data.meter_seconds = g_last_meter_data.meter_seconds; data.meter_seconds_valid = g_last_meter_data.meter_seconds_valid; @@ -1222,15 +1284,6 @@ static void sender_loop() { } else { g_meter_stale_seconds = g_last_meter_valid ? (meter_age_ms / 1000) : (g_meter_stale_seconds + 1); } - if (!meter_ok) { - note_fault(g_sender_faults, g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms, FaultType::MeterRead); - display_set_last_error(g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms); - } - if (g_meter_time_jump_pending) { - g_meter_time_jump_pending = false; - note_fault(g_sender_faults, g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms, FaultType::MeterRead); - display_set_last_error(g_sender_last_error, g_sender_last_error_utc, g_sender_last_error_ms); - } if (g_build_count == 0 && battery_sample_due(now_ms)) { update_battery_cache(); } @@ -1458,15 +1511,20 @@ static void sender_loop() { uint32_t idle_ms = next_due - now_ms; if (SERIAL_DEBUG_MODE) { g_sender_sleep_ms += idle_ms; - if (now_ms - g_sender_power_log_ms >= 10000) { + if (now_ms - g_sender_power_log_ms >= SENDER_DIAG_LOG_INTERVAL_MS) { g_sender_power_log_ms = now_ms; serial_debug_printf("power: rx_ms=%lu sleep_ms=%lu", static_cast(g_sender_rx_window_ms), static_cast(g_sender_sleep_ms)); } } lora_sleep(); - if (g_time_acquired) { - // Keep the meter reader task running while metering is active. + if (LIGHT_SLEEP_IDLE) { + // Chunked light-sleep: wake every LIGHT_SLEEP_CHUNK_MS so the + // meter_reader_task (Core 0, prio 2) can drain the 128-byte UART HW FIFO + // before it overflows (~133 ms at 9600 baud). Saves ~25 mA vs delay(). + light_sleep_chunked_ms(idle_ms, LIGHT_SLEEP_CHUNK_MS); + } else if (g_time_acquired) { + // Fallback: keep meter reader task alive with an active wait. delay(idle_ms); } else { light_sleep_ms(idle_ms);