mh
/
espurna-mirror
mirror of https://github.com/xoseperez/espurna


								/*


								Part of the TERMINAL MODULE


								Copyright (C) 2016-2019 by Xose Pérez <xose dot perez at gmail dot com>

								Copyright (C) 2020 by Maxim Prokhorov <prokhorov dot max at outlook dot com>


								*/


								#include <vector>

								#include <cctype>


								#include "terminal_parsing.h"


								namespace espurna {

								namespace terminal {

								namespace parser {


								String error(Error value) {

								    String out;


								    switch (value) {

								    case Error::Ok:

								        out = PSTR("Ok");

								        break;

								    case Error::Uninitialized:

								        out = PSTR("Uninitialized");

								        break;

								    case Error::Busy:

								        out = PSTR("Busy");

								        break;

								    case Error::UnescapedText:

								        out = PSTR("UnescapedText");

								        break;

								    case Error::UnterminatedQuote:

								        out = PSTR("UnterminatedQuote");

								        break;

								    case Error::InvalidEscape:

								        out = PSTR("InvalidEscape");

								        break;

								    case Error::UnexpectedLineEnd:

								        out = PSTR("UnexpectedLineEnd");

								        break;

								    case Error::NoSpaceAfterQuote:

								        out = PSTR("NoSpaceAfterQuote");

								        break;

								    }


								    return out;

								}


								namespace {


								// Original code is part of the SDSLib 2.0 -- A C dynamic strings library

								// - https://github.com/antirez/sds/blob/master/sds.c

								// - https://github.com/antirez/redis/blob/unstable/src/networking.c

								// Replaced with a stateful parser to avoid random look-ahead issues in the code,

								// and really make sure we **never** go out of bounds of the given view.

								// (e.g. when we want to parse only a part of a larger string)


								// Helper functions to handle \xHH codes that could encode

								// non-printable characters for commands or arguments

								bool is_hex_digit(char c) {

								    switch (c) {

								    case '0' ... '9':

								    case 'a' ... 'f':

								    case 'A' ... 'F':

								        return true;

								    }


								    return false;

								}


								char hex_digit_to_byte(char c) {

								    switch (c) {

								    case '0'...'9':

								        return c - '0';

								    case 'a':

								    case 'A':

								        return 10;

								    case 'b':

								    case 'B':

								        return 11;

								    case 'c':

								    case 'C':

								        return 12;

								    case 'd':

								    case 'D':

								        return 13;

								    case 'e':

								    case 'E':

								        return 14;

								    case 'f':

								    case 'F':

								        return 15;

								    }


								    return 0;

								}


								char hex_digit_to_value(char lhs, char rhs) {

								    return (hex_digit_to_byte(lhs) << 4) | hex_digit_to_byte(rhs);

								}


								// printable ASCII character set

								bool is_printable(char c) {

								    switch (c) {

								    case ' '...'~':

								        return true;

								    }


								    return false;

								}


								// allowed 'special' input characters

								char unescape_char(char c) {

								    switch (c) {

								    case 'n':

								        return '\n';

								    case 'r':

								        return '\r';

								    case 't':

								        return '\t';

								    case 'b':

								        return '\b';

								    case 'a':

								        return '\a';

								    }


								    return c;

								}


								struct Result {

								    Result() = default;


								    Result& operator=(Error error) {

								        _error = error;

								        _argv.clear();

								        return *this;

								    }


								    Result& operator=(Argv&& argv) {

								        _argv = std::move(argv);

								        _error = Error::Ok;

								        return *this;

								    }


								    explicit operator bool() const {

								        return _error == Error::Ok;

								    }


								    CommandLine commandLine() {

								        auto out = CommandLine{

								            .argv = std::move(_argv),

								            .error = _error };


								        _error = Error::Uninitialized;

								        return out;

								    }


								private:

								    Error _error { Error::Uninitialized };

								    Argv _argv;

								};


								struct Parser {

								    Parser() = default;

								    Result operator()(StringView);


								private:

								    // only tracked within our `operator()(<LINE>)`

								    enum class State {

								        Done,

								        Initial,

								        Text,

								        CarriageReturn,

								        CarriageReturnAfterText,

								        SkipUntilNewLine,

								        EscapedText,

								        EscapedByteLhs,

								        EscapedByteRhs,

								        SingleQuote,

								        EscapedQuote,

								        DoubleQuote,

								        AfterQuote,

								    };


								    // disallow re-entry with a custom lock handler

								    struct Lock {

								        Lock() = delete;


								        Lock(const Lock&) = delete;

								        Lock& operator=(const Lock&) = delete;


								        Lock(Lock&&) = default;

								        Lock& operator=(Lock&&) = delete;


								        Lock(bool& handle) :

								            _initialized(!handle),

								            _handle(handle)

								        {}


								        ~Lock() {

								            unlock();

								        }


								        bool initialized() const {

								            return _initialized;

								        }


								        void lock() {

								            if (initialized()) {

								                _handle = true;

								            }

								        }


								        void unlock() {

								            if (initialized()) {

								                _handle = false;

								            }

								        }

								    private:

								        bool _initialized;

								        bool& _handle;

								    };


								    // intermediate storage for

								    // - ARGV resulting list

								    // - text buffer or intermediate span range

								    // - escaped character (since we don't look ahead when iterating)

								    struct Values {

								        Argv argv;

								        String chunk;

								        const char* span_begin { nullptr };

								        const char* span_end { nullptr };

								        char byte_lhs { 0 };

								    };


								    static void append_span(Values& values, const char* ptr) {

								        if (!values.span_begin) {

								            values.span_begin = ptr;

								        }


								        values.span_end = !values.span_end

								            ? std::next(values.span_begin)

								            : std::next(ptr);

								    }


								    static void push_span(Values& values) {

								        if (values.span_begin && values.span_end) {

								            StringView span(values.span_begin, values.span_end);

								            values.chunk.concat(span.c_str(), span.length());

								            values.span_begin = nullptr;

								            values.span_end = nullptr;

								        }

								    }


								    static void append_chunk(Values& values, char c) {

								        push_span(values);

								        values.chunk.concat(&c, 1);

								    }


								    static void push_chunk(Values& values) {

								        push_span(values);

								        values.argv.push_back(values.chunk);

								        values.chunk = "";

								    }


								    bool _parsing { false };

								};


								Result Parser::operator()(StringView line) {

								    Result result;

								    Values values;


								    State state { State::Initial };


								    Lock lock(_parsing);

								    if (!lock.initialized()) {

								        result = Error::Busy;

								        goto out;

								    }


								    for (auto it = line.begin(); it != line.end(); ++it) {

								        switch (State(state)) {

								        case State::Initial:

								            switch (*it) {

								            case ' ':

								            case '\t':

								                break;

								            case '\r':

								                state = State::CarriageReturn;

								                break;

								            case '\n':

								                state = State::Done;

								                break;

								            default:

								                state = State::Text;

								                goto text;

								            }

								            break;


								        case State::Done:

								            goto out;


								        case State::Text:

								text:

								            switch (*it) {

								            case ' ':

								            case '\t':

								                push_chunk(values);

								                state = State::Initial;

								                break;

								            case '"':

								                state = State::DoubleQuote;

								                break;

								            case '\'':

								                state = State::SingleQuote;

								                break;

								            case '\r':

								                state = State::CarriageReturnAfterText;

								                break;

								            case '\n':

								                push_chunk(values);

								                state = State::Done;

								                break;

								            default:

								                if (is_printable(*it)) {

								                    append_span(values, it);

								                } else {

								                    result = Error::UnescapedText;

								                    goto out;

								                }

								                break;

								            }

								            break;


								        case State::CarriageReturn:

								            if ((*it) == '\n') {

								                state = State::Done;

								            } else {

								                result = Error::UnexpectedLineEnd;

								                goto out;

								            }

								            break;


								        case State::CarriageReturnAfterText:

								            if ((*it) == '\n') {

								                push_chunk(values);

								                state = State::Done;

								            } else {

								                result = Error::UnexpectedLineEnd;

								                goto out;

								            }

								            break;


								        case State::SkipUntilNewLine:

								            switch (*it) {

								            case '\r':

								                state = State::CarriageReturn;

								                break;

								            case '\n':

								                state = State::Initial;

								                break;

								            }

								            break;


								        case State::EscapedText: {

								            switch (*it) {

								            case '\r':

								            case '\n':

								                result = Error::UnexpectedLineEnd;

								                goto out;

								            case 'x':

								                state = State::EscapedByteLhs;

								                break;

								            default:

								                append_chunk(values, unescape_char(*it));

								                break;

								            }

								            break;

								        }


								        case State::EscapedByteLhs:

								            if (is_hex_digit(*it)) {

								                values.byte_lhs = *it;

								                state = State::EscapedByteRhs;

								            } else {

								                result = Error::InvalidEscape;

								                goto out;

								            }

								            break;


								        case State::EscapedByteRhs:

								            if (is_hex_digit(*it)) {

								                append_chunk(values, hex_digit_to_value(values.byte_lhs, *it));

								                state = State::DoubleQuote;

								            } else {

								                result = Error::InvalidEscape;

								                goto out;

								            }

								            break;


								        case State::SingleQuote:

								            switch (*it) {

								            case '\r':

								            case '\n':

								                result = Error::UnterminatedQuote;

								                goto out;

								            case '\\':

								                state = State::EscapedQuote;

								                break;

								            case '\'':

								                state = State::AfterQuote;

								                break;

								            default:

								                if (is_printable(*it)) {

								                    append_span(values, it);

								                } else {

								                    result = Error::UnescapedText;

								                    goto out;

								                }

								                break;

								            }

								            break;


								        case State::EscapedQuote:

								            switch (*it) {

								            case '\'':

								                values.chunk.concat(*it);

								                state = State::SingleQuote;

								                break;

								            default:

								                result = Error::InvalidEscape;

								                goto out;

								            }

								            break;


								        case State::AfterQuote:

								            switch (*it) {

								            case '\r':

								                state = State::CarriageReturnAfterText;

								                break;

								            case ' ':

								            case '\t':

								                push_chunk(values);

								                state = State::Initial;

								                break;

								            case '\n':

								                push_chunk(values);

								                state = State::Done;

								                break;

								            default:

								                result = Error::NoSpaceAfterQuote;

								                goto out;

								            }

								            break;


								        case State::DoubleQuote:

								            switch (*it) {

								            case '\r':

								            case '\n':

								                result = Error::UnterminatedQuote;

								                goto out;

								            case '"':

								                state = State::AfterQuote;

								                break;

								            case '\\':

								                state = State::EscapedText;

								                break;

								            default:

								                if (is_printable(*it)) {

								                    append_span(values, it);

								                } else {

								                    result = Error::UnescapedText;

								                    goto out;

								                }

								                break;

								            }

								            break;


								        }

								    }


								out:

								    if (state == State::Done) {

								        result = std::move(values.argv);

								    }


								    // whenever line ends before we are done parsing, make sure

								    // result contains a valid error condition (same as in the switch above)

								    if (!result) {

								        switch (state) {

								        case State::Done:

								            break;

								        case State::CarriageReturn:

								        case State::CarriageReturnAfterText:

								        case State::Text:

								        case State::Initial:

								        case State::SkipUntilNewLine:

								            result = Error::UnexpectedLineEnd;

								            break;

								        case State::EscapedByteLhs:

								        case State::EscapedByteRhs:

								        case State::EscapedText:

								        case State::EscapedQuote:

								            result = Error::InvalidEscape;

								            break;

								        case State::SingleQuote:

								        case State::DoubleQuote:

								            result = Error::UnterminatedQuote;

								            break;

								        case State::AfterQuote:

								            result = Error::NoSpaceAfterQuote;

								            break;

								        }

								    }


								    return result;

								}


								CommandLine parse_line(StringView line) {

								    static Parser parser;

								    return parser(line).commandLine();

								}


								} // namespace


								// Fowler–Noll–Vo hash function to hash command strings that treats input as lowercase

								// ref: https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function

								//

								// This is here in case `std::unordered_map` becomes viable

								// TODO: afaik, map implementation should handle collisions (however rare they are in our case)

								// if not, we can always roll static commands allocation and just match strings with strcmp_P


								uint32_t lowercase_fnv1_hash(StringView value) {

								    constexpr uint32_t fnv_prime = 16777619u;

								    constexpr uint32_t fnv_basis = 2166136261u;


								    uint32_t hash = fnv_basis;

								    for (auto it = value.begin(); it != value.end(); ++it) {

								        hash = hash ^ static_cast<uint32_t>(tolower(pgm_read_byte(it)));

								        hash = hash * fnv_prime;

								    }


								    return hash;

								}


								} // namespace parser


								CommandLine parse_line(StringView value) {

								    return parser::parse_line(value);

								}


								} // namespace terminal

								} // namespace espurna