/*
|
|
|
|
Part of the TERMINAL MODULE
|
|
|
|
Copyright (C) 2016-2019 by Xose Pérez <xose dot perez at gmail dot com>
|
|
Copyright (C) 2020 by Maxim Prokhorov <prokhorov dot max at outlook dot com>
|
|
|
|
*/
|
|
|
|
#include <vector>
|
|
#include <cctype>
|
|
|
|
#include "terminal_parsing.h"
|
|
|
|
namespace espurna {
|
|
namespace terminal {
|
|
namespace parser {
|
|
|
|
String error(Error value) {
|
|
String out;
|
|
|
|
switch (value) {
|
|
case Error::Ok:
|
|
out = PSTR("Ok");
|
|
break;
|
|
case Error::Uninitialized:
|
|
out = PSTR("Uninitialized");
|
|
break;
|
|
case Error::Busy:
|
|
out = PSTR("Busy");
|
|
break;
|
|
case Error::UnterminatedQuote:
|
|
out = PSTR("UnterminatedQuote");
|
|
break;
|
|
case Error::InvalidEscape:
|
|
out = PSTR("InvalidEscape");
|
|
break;
|
|
case Error::UnexpectedLineEnd:
|
|
out = PSTR("UnexpectedLineEnd");
|
|
break;
|
|
case Error::NoSpaceAfterQuote:
|
|
out = PSTR("NoSpaceAfterQuote");
|
|
break;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
namespace {
|
|
|
|
// Original code is part of the SDSLib 2.0 -- A C dynamic strings library
|
|
// - https://github.com/antirez/sds/blob/master/sds.c
|
|
// - https://github.com/antirez/redis/blob/unstable/src/networking.c
|
|
// Replaced with a stateful parser to avoid random look-ahead issues in the code,
|
|
// and really make sure we **never** go out of bounds of the given view.
|
|
// (e.g. when we want to parse only a part of a larger string)
|
|
|
|
// Helper functions to handle \xHH codes that could encode
|
|
// non-printable characters for commands or arguments
|
|
bool is_hex_digit(char c) {
|
|
switch (c) {
|
|
case '0' ... '9':
|
|
case 'a' ... 'f':
|
|
case 'A' ... 'F':
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
char hex_digit_to_byte(char c) {
|
|
switch (c) {
|
|
case '0'...'9':
|
|
return c - '0';
|
|
case 'a':
|
|
case 'A':
|
|
return 10;
|
|
case 'b':
|
|
case 'B':
|
|
return 11;
|
|
case 'c':
|
|
case 'C':
|
|
return 12;
|
|
case 'd':
|
|
case 'D':
|
|
return 13;
|
|
case 'e':
|
|
case 'E':
|
|
return 14;
|
|
case 'f':
|
|
case 'F':
|
|
return 15;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
char hex_digit_to_value(char lhs, char rhs) {
|
|
return (hex_digit_to_byte(lhs) << 4) | hex_digit_to_byte(rhs);
|
|
}
|
|
|
|
// allowed 'special' input characters
|
|
char unescape_char(char c) {
|
|
switch (c) {
|
|
case 'n':
|
|
return '\n';
|
|
case 'r':
|
|
return '\r';
|
|
case 't':
|
|
return '\t';
|
|
case 'b':
|
|
return '\b';
|
|
case 'a':
|
|
return '\a';
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
struct Result {
|
|
Result() = default;
|
|
|
|
Result& operator=(Error error) {
|
|
_error = error;
|
|
_argv.clear();
|
|
return *this;
|
|
}
|
|
|
|
Result& operator=(Argv&& argv) {
|
|
_argv = std::move(argv);
|
|
_error = Error::Ok;
|
|
return *this;
|
|
}
|
|
|
|
explicit operator bool() const {
|
|
return _error == Error::Ok;
|
|
}
|
|
|
|
Error error() const {
|
|
return _error;
|
|
}
|
|
|
|
CommandLine get() {
|
|
auto out = CommandLine{
|
|
.argv = std::move(_argv),
|
|
.error = _error };
|
|
|
|
_error = Error::Uninitialized;
|
|
return out;
|
|
}
|
|
|
|
private:
|
|
Error _error { Error::Uninitialized };
|
|
Argv _argv;
|
|
};
|
|
|
|
struct Parser {
|
|
Parser() = default;
|
|
Result operator()(StringView);
|
|
|
|
private:
|
|
// only tracked within our `operator()(<LINE>)`
|
|
enum class State {
|
|
Done,
|
|
Initial,
|
|
Text,
|
|
CarriageReturn,
|
|
CarriageReturnAfterText,
|
|
SkipUntilNewLine,
|
|
EscapedText,
|
|
EscapedByteLhs,
|
|
EscapedByteRhs,
|
|
SingleQuote,
|
|
EscapedQuote,
|
|
DoubleQuote,
|
|
AfterQuote,
|
|
};
|
|
|
|
// our storage for
|
|
// - ARGV resulting list
|
|
// - text buffer or (interim) text span / range
|
|
// - escaped character (since we don't look ahead when iterating)
|
|
struct Values {
|
|
struct Span {
|
|
const char* begin { nullptr };
|
|
const char* end { nullptr };
|
|
};
|
|
|
|
Span span;
|
|
String chunk;
|
|
char byte_lhs { 0 };
|
|
|
|
Argv argv;
|
|
|
|
void append_span(const char* ptr) {
|
|
if (!span.begin) {
|
|
span.begin = ptr;
|
|
}
|
|
|
|
span.end = !span.end
|
|
? std::next(span.begin)
|
|
: std::next(ptr);
|
|
}
|
|
|
|
void push_span() {
|
|
if (span.begin && span.end) {
|
|
StringView view(span.begin, span.end);
|
|
chunk.concat(view.c_str(), view.length());
|
|
span = Values::Span{};
|
|
}
|
|
}
|
|
|
|
void append_chunk(char c) {
|
|
push_span();
|
|
chunk.concat(&c, 1);
|
|
}
|
|
|
|
void append_byte_lhs(char c) {
|
|
byte_lhs = c;
|
|
}
|
|
|
|
void append_byte_rhs(char c) {
|
|
append_chunk(hex_digit_to_value(byte_lhs, c));
|
|
}
|
|
|
|
void push_chunk() {
|
|
push_span();
|
|
argv.push_back(chunk);
|
|
chunk = "";
|
|
}
|
|
};
|
|
|
|
bool _parsing { false };
|
|
};
|
|
|
|
Result Parser::operator()(StringView line) {
|
|
Result result;
|
|
Values values;
|
|
|
|
State state { State::Initial };
|
|
|
|
ReentryLock lock(_parsing);
|
|
if (!lock.initialized()) {
|
|
result = Error::Busy;
|
|
goto out;
|
|
}
|
|
|
|
for (auto it = line.begin(); it != line.end(); ++it) {
|
|
switch (State(state)) {
|
|
case State::Initial:
|
|
switch (*it) {
|
|
case ' ':
|
|
case '\t':
|
|
break;
|
|
case '\r':
|
|
state = State::CarriageReturn;
|
|
break;
|
|
case '\n':
|
|
state = State::Done;
|
|
break;
|
|
default:
|
|
state = State::Text;
|
|
goto text;
|
|
}
|
|
break;
|
|
|
|
case State::Done:
|
|
goto out;
|
|
|
|
case State::Text:
|
|
text:
|
|
switch (*it) {
|
|
case ' ':
|
|
case '\t':
|
|
values.push_chunk();
|
|
state = State::Initial;
|
|
break;
|
|
case '"':
|
|
state = State::DoubleQuote;
|
|
break;
|
|
case '\'':
|
|
state = State::SingleQuote;
|
|
break;
|
|
case '\r':
|
|
state = State::CarriageReturnAfterText;
|
|
break;
|
|
case '\n':
|
|
values.push_chunk();
|
|
state = State::Done;
|
|
break;
|
|
default:
|
|
values.append_span(it);
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case State::CarriageReturn:
|
|
if ((*it) == '\n') {
|
|
state = State::Done;
|
|
} else {
|
|
result = Error::UnexpectedLineEnd;
|
|
goto out;
|
|
}
|
|
break;
|
|
|
|
case State::CarriageReturnAfterText:
|
|
if ((*it) == '\n') {
|
|
values.push_chunk();
|
|
state = State::Done;
|
|
} else {
|
|
result = Error::UnexpectedLineEnd;
|
|
goto out;
|
|
}
|
|
break;
|
|
|
|
case State::SkipUntilNewLine:
|
|
switch (*it) {
|
|
case '\r':
|
|
state = State::CarriageReturn;
|
|
break;
|
|
case '\n':
|
|
state = State::Initial;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case State::EscapedText: {
|
|
switch (*it) {
|
|
case '\r':
|
|
case '\n':
|
|
result = Error::UnexpectedLineEnd;
|
|
goto out;
|
|
case 'x':
|
|
state = State::EscapedByteLhs;
|
|
break;
|
|
default:
|
|
values.append_chunk(unescape_char(*it));
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case State::EscapedByteLhs:
|
|
if (is_hex_digit(*it)) {
|
|
values.append_byte_lhs(*it);
|
|
state = State::EscapedByteRhs;
|
|
} else {
|
|
result = Error::InvalidEscape;
|
|
goto out;
|
|
}
|
|
break;
|
|
|
|
case State::EscapedByteRhs:
|
|
if (is_hex_digit(*it)) {
|
|
values.append_byte_rhs(*it);
|
|
state = State::DoubleQuote;
|
|
} else {
|
|
result = Error::InvalidEscape;
|
|
goto out;
|
|
}
|
|
break;
|
|
|
|
case State::SingleQuote:
|
|
switch (*it) {
|
|
case '\r':
|
|
case '\n':
|
|
result = Error::UnterminatedQuote;
|
|
goto out;
|
|
case '\\':
|
|
state = State::EscapedQuote;
|
|
break;
|
|
case '\'':
|
|
state = State::AfterQuote;
|
|
break;
|
|
default:
|
|
values.append_span(it);
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case State::EscapedQuote:
|
|
switch (*it) {
|
|
case '\'':
|
|
values.chunk.concat(*it);
|
|
state = State::SingleQuote;
|
|
break;
|
|
default:
|
|
result = Error::InvalidEscape;
|
|
goto out;
|
|
}
|
|
break;
|
|
|
|
case State::AfterQuote:
|
|
switch (*it) {
|
|
case '\r':
|
|
state = State::CarriageReturnAfterText;
|
|
break;
|
|
case ' ':
|
|
case '\t':
|
|
values.push_chunk();
|
|
state = State::Initial;
|
|
break;
|
|
case '\n':
|
|
values.push_chunk();
|
|
state = State::Done;
|
|
break;
|
|
default:
|
|
result = Error::NoSpaceAfterQuote;
|
|
goto out;
|
|
}
|
|
break;
|
|
|
|
case State::DoubleQuote:
|
|
switch (*it) {
|
|
case '\r':
|
|
case '\n':
|
|
result = Error::UnterminatedQuote;
|
|
goto out;
|
|
case '"':
|
|
state = State::AfterQuote;
|
|
break;
|
|
case '\\':
|
|
state = State::EscapedText;
|
|
break;
|
|
default:
|
|
values.append_span(it);
|
|
break;
|
|
}
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
out:
|
|
if (state == State::Done) {
|
|
result = std::move(values.argv);
|
|
}
|
|
|
|
// whenever line ends before we are done parsing, make sure
|
|
// result contains a valid error condition (same as in the switch above)
|
|
if (result.error() == Error::Uninitialized) {
|
|
switch (state) {
|
|
case State::Done:
|
|
break;
|
|
case State::CarriageReturn:
|
|
case State::CarriageReturnAfterText:
|
|
case State::Text:
|
|
case State::Initial:
|
|
case State::SkipUntilNewLine:
|
|
result = Error::UnexpectedLineEnd;
|
|
break;
|
|
case State::EscapedByteLhs:
|
|
case State::EscapedByteRhs:
|
|
case State::EscapedText:
|
|
case State::EscapedQuote:
|
|
result = Error::InvalidEscape;
|
|
break;
|
|
case State::SingleQuote:
|
|
case State::DoubleQuote:
|
|
result = Error::UnterminatedQuote;
|
|
break;
|
|
case State::AfterQuote:
|
|
result = Error::NoSpaceAfterQuote;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
CommandLine parse_line(StringView line) {
|
|
static Parser parser;
|
|
return parser(line).get();
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// Fowler–Noll–Vo hash function to hash command strings that treats input as lowercase
|
|
// ref: https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
|
//
|
|
// This is here in case `std::unordered_map` becomes viable
|
|
// TODO: afaik, map implementation should handle collisions (however rare they are in our case)
|
|
// if not, we can always roll static commands allocation and just match strings with strcmp_P
|
|
|
|
uint32_t lowercase_fnv1_hash(StringView value) {
|
|
constexpr uint32_t fnv_prime = 16777619u;
|
|
constexpr uint32_t fnv_basis = 2166136261u;
|
|
|
|
uint32_t hash = fnv_basis;
|
|
for (auto it = value.begin(); it != value.end(); ++it) {
|
|
hash = hash ^ static_cast<uint32_t>(tolower(pgm_read_byte(it)));
|
|
hash = hash * fnv_prime;
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
} // namespace parser
|
|
|
|
CommandLine parse_line(StringView value) {
|
|
return parser::parse_line(value);
|
|
}
|
|
|
|
} // namespace terminal
|
|
} // namespace espurna
|