Browse Source

system: set unstable flag when hwdt reset happens

- reduce number of resets that triggers unstable mode
- introduce a reboot reason for stability counter
  which only gets handled on soft-reset
- allow to forcibly change to either stable or unstable mode
  more explicit terminal comands to leave or enter either mode
pull/2534/head
Maxim Prokhorov 1 year ago
parent
commit
f4951d7228
5 changed files with 114 additions and 54 deletions
  1. +2
    -2
      code/espurna/board.cpp
  2. +1
    -1
      code/espurna/config/general.h
  3. +83
    -34
      code/espurna/system.cpp
  4. +12
    -9
      code/espurna/system.h
  5. +16
    -8
      code/espurna/terminal.cpp

+ 2
- 2
code/espurna/board.cpp View File

@ -454,8 +454,8 @@ void boardSetup() {
if (std::memcmp(&page[ConfigOffset], &Reference[0], Reference.size()) != 0) {
DEBUG_MSG_P(PSTR("[BOARD] Invalid SDK config at 0x%08X, resetting...\n"), Address + ConfigOffset);
customResetReason(CustomResetReason::Factory);
eraseSDKConfig();
__builtin_trap();
systemForceStable();
forceEraseSDKConfig();
// can't return!
}
}


+ 1
- 1
code/espurna/config/general.h View File

@ -222,7 +222,7 @@
#endif
#ifndef SYSTEM_CHECK_MAX
#define SYSTEM_CHECK_MAX 5 // After this many crashes on boot
#define SYSTEM_CHECK_MAX 3 // After this many crashes on boot
// the system is flagged as unstable
#endif


+ 83
- 34
code/espurna/system.cpp View File

@ -251,6 +251,9 @@ String serialize(CustomResetReason reason) {
case CustomResetReason::Web:
ptr = F("Reboot from web interface");
break;
case CustomResetReason::Stability:
ptr = F("Reboot after changing stability counter");
break;
}
return String(ptr);
@ -339,13 +342,37 @@ bool flag { true };
} // namespace internal
// system_get_rst_info() result is cached by the Core init for internal use
uint32_t system_reason() {
return resetInfo.reason;
}
// prunes custom reason after accessing it once
CustomResetReason customReason() {
static const CustomResetReason reason = ([]() {
const auto out = static_cast<bool>(internal::persistent_data)
? internal::persistent_data.reason()
: CustomResetReason::None;
internal::persistent_data.reason(CustomResetReason::None);
return out;
})();
return reason;
}
void customReason(CustomResetReason reason) {
internal::persistent_data.reason(reason);
}
#if SYSTEM_CHECK_ENABLED
namespace stability {
namespace build {
static constexpr uint8_t ChecksMin { 0 };
static constexpr uint8_t ChecksMin { 1 };
static constexpr uint8_t ChecksMax { SYSTEM_CHECK_MAX };
static constexpr uint8_t ChecksIncrement { 1 };
static_assert(ChecksMax > 1, "");
static_assert(ChecksMin < ChecksMax, "");
@ -354,20 +381,57 @@ static_assert(CheckTime > espurna::duration::Seconds::min(), "");
} // namespace build
void force_stable() {
internal::persistent_data.counter(build::ChecksMin);
internal::flag = true;
}
void force_unstable() {
internal::persistent_data.counter(build::ChecksMax);
internal::flag = false;
}
uint8_t counter() {
return static_cast<bool>(internal::persistent_data)
? internal::persistent_data.counter()
: build::ChecksMin;
}
void init() {
// on cold boot / rst, bumps count to 2 so we don't end up
// spamming crash recorder in case something goes wrong
const auto count = static_cast<bool>(internal::persistent_data)
? internal::persistent_data.counter() : 1u;
const auto count = counter();
switch (system_reason()) {
// initial boot and rst are probably just fine
case REASON_DEFAULT_RST:
case REASON_EXT_SYS_RST:
force_stable();
return;
// no point stalling, we are probably stuck somewhere
case REASON_WDT_RST:
force_unstable();
return;
// when counter gets changed manually
case REASON_SOFT_RESTART:
if (customReason() == CustomResetReason::Stability) {
internal::flag = (count < build::ChecksMax);
return;
}
break;
}
// bump counter value and persist. if we re-enter with maximum
// once more, system is flagged as unstable.
// so, we simply wait for the timer to reset back to minimum
// and start the cycle again
const auto next = std::min(build::ChecksMax,
static_cast<uint8_t>(count + build::ChecksIncrement));
internal::persistent_data.counter(next);
internal::flag = (count < build::ChecksMax);
internal::timer.once_scheduled(build::CheckTime.count(), []() {
DEBUG_MSG_P(PSTR("[MAIN] Resetting stability counter\n"));
internal::persistent_data.counter(build::ChecksMin);
});
const auto next = count + 1u;
internal::persistent_data.counter((next > build::ChecksMax) ? count : next);
}
bool check() {
@ -377,28 +441,6 @@ bool check() {
} // namespace stability
#endif
// system_get_rst_info() result is cached by the Core init for internal use
uint32_t system_reason() {
return resetInfo.reason;
}
// prunes custom reason after accessing it once
CustomResetReason customReason() {
static const CustomResetReason reason = ([]() {
const auto out = static_cast<bool>(internal::persistent_data)
? internal::persistent_data.reason()
: CustomResetReason::None;
internal::persistent_data.reason(CustomResetReason::None);
return out;
})();
return reason;
}
void customReason(CustomResetReason reason) {
internal::persistent_data.reason(reason);
}
} // namespace boot
// -----------------------------------------------------------------------------
@ -688,6 +730,8 @@ void init() {
pushOnce([](Mask) {
if (!espurna::boot::stability::check()) {
DEBUG_MSG_P(PSTR("[MAIN] System UNSTABLE\n"));
} else if (espurna::boot::internal::timer.active()) {
DEBUG_MSG_P(PSTR("[MAIN] Pending stability counter reset...\n"));
}
return true;
});
@ -767,12 +811,9 @@ void reset(CustomResetReason reason) {
// ```
// triggered in SYS, might not always result in a clean reboot b/c of expected suspend
// triggered in CONT *should* end up never returning back and loop might now be needed
// (but, try to force swdt reboot in case it somehow happens)
[[noreturn]] void reset() {
ESP.restart();
for (;;) {
delay(100);
}
__builtin_trap();
}
// 'simple' reboot call with software controlled time
@ -916,6 +957,14 @@ void systemStabilityCounter(uint8_t count) {
espurna::boot::internal::persistent_data.counter(count);
}
void systemForceUnstable() {
espurna::boot::stability::force_unstable();
}
void systemForceStable() {
espurna::boot::stability::force_stable();
}
bool systemCheck() {
return espurna::boot::stability::check();
}


+ 12
- 9
code/espurna/system.h View File

@ -22,16 +22,17 @@ struct HeapStats {
enum class CustomResetReason : uint8_t {
None,
Button,
Factory,
Hardware,
Button, // button event action
Factory, // requested factory reset
Hardware, // driver event
Mqtt,
Ota,
Rpc,
Rule,
Scheduler,
Terminal,
Web
Ota, // successful ota
Rpc, // rpc (api) calls
Rule, // rpn rule operator action
Scheduler, // scheduled reset
Terminal, // terminal command action
Web, // webui action
Stability, // stable counter action
};
namespace espurna {
@ -300,6 +301,8 @@ uint32_t systemResetReason();
uint8_t systemStabilityCounter();
void systemStabilityCounter(uint8_t count);
void systemForceStable();
void systemForceUnstable();
bool systemCheck();
void customResetReason(CustomResetReason);


+ 16
- 8
code/espurna/terminal.cpp View File

@ -438,18 +438,26 @@ void _terminalInitCommands() {
});
terminalRegisterCommand(F("RESET"), [](::terminal::CommandContext&& ctx) {
auto count = 1;
if (ctx.argv.size() == 2) {
count = ctx.argv[1].toInt();
if (count < SYSTEM_CHECK_MAX) {
systemStabilityCounter(count);
}
}
terminalOK(ctx);
prepareReset(CustomResetReason::Terminal);
});
#if SYSTEM_CHECK_ENABLED
terminalRegisterCommand(F("STABLE"), [](::terminal::CommandContext&& ctx) {
systemForceStable();
prepareReset(CustomResetReason::Stability);
});
terminalRegisterCommand(F("TRAP"), [](::terminal::CommandContext&& ctx) {
__builtin_trap();
});
terminalRegisterCommand(F("UNSTABLE"), [](::terminal::CommandContext&& ctx) {
systemForceUnstable();
prepareReset(CustomResetReason::Stability);
});
#endif
terminalRegisterCommand(F("UPTIME"), [](::terminal::CommandContext&& ctx) {
ctx.output.printf_P(PSTR("uptime %s\n"), getUptime().c_str());
terminalOK(ctx);


Loading…
Cancel
Save