/* Part of the TERMINAL MODULE Copyright (C) 2016-2019 by Xose Pérez Copyright (C) 2020 by Maxim Prokhorov */ #include #include #include "terminal_parsing.h" namespace terminal { namespace parsing { // c/p with minor modifications from redis / sds, so that we don't have to roll a custom parser // ref: // - https://github.com/antirez/sds/blob/master/sds.c // - https://github.com/antirez/redis/blob/unstable/src/networking.c // // Things are kept mostly the same, we are replacing Redis-specific things: // - sds structure -> String // - sds array -> std::vector // - we return always return custom structure, nullptr can no longer be used // to notify about the missing / unterminated / mismatching quotes // - hex_... function helpers types are changed // Original code is part of the SDSLib 2.0 -- A C dynamic strings library // * // * Copyright (c) 2006-2015, Salvatore Sanfilippo // * Copyright (c) 2015, Oran Agra // * Copyright (c) 2015, Redis Labs, Inc // * All rights reserved. // * // * Redistribution and use in source and binary forms, with or without // * modification, are permitted provided that the following conditions are met: // * // * * Redistributions of source code must retain the above copyright notice, // * this list of conditions and the following disclaimer. // * * Redistributions in binary form must reproduce the above copyright // * notice, this list of conditions and the following disclaimer in the // * documentation and/or other materials provided with the distribution. // * * Neither the name of Redis nor the names of its contributors may be used // * to endorse or promote products derived from this software without // * specific prior written permission. // * // * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE // * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // * POSSIBILITY OF SUCH DAMAGE. // Helper functions to handle \xHH codes static bool is_hex_digit(char c) { return (c >= '0' && c <= '9') \ ||(c >= 'a' && c <= 'f') \ ||(c >= 'A' && c <= 'F'); } static char hex_digit_to_int(char c) { switch (c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return 0; } } // Our port of `sdssplitargs` CommandLine parse_commandline(const char *line) { const char *p = line; CommandLine result {{}, 0}; result.argv.reserve(4); String current; while(1) { /* skip blanks */ while(*p && isspace(*p)) p++; if (*p) { /* get a token */ int inq=0; /* set to 1 if we are in "quotes" */ int insq=0; /* set to 1 if we are in 'single quotes' */ int done=0; while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { // XXX: make sure that we append `char` or `char[]`, // even with -funsigned-char this can accidentally append itoa conversion unsigned char byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); char buf[2] { static_cast(byte), 0x00 }; current += buf; p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current += c; } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { char buf[2] {*p, '\0'}; current += buf; } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current += '\''; } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */ if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */ goto err; } else { char buf[2] {*p, '\0'}; current += buf; } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: { char buf[2] {*p, '\0'}; current += buf; break; } } } if (*p) p++; } /* add the token to the vector */ result.argv.emplace_back(std::move(current)); ++result.argc; } else { /* Even on empty input string return something not NULL. */ return result; } } err: result.argc = 0; result.argv.clear(); return result; } // Fowler–Noll–Vo hash function to hash command strings that treats input as lowercase // ref: https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function // // This is here in case `std::unordered_map` becomes viable // TODO: afaik, map implementation should handle collisions (however rare they are in our case) // if not, we can always roll static commands allocation and just match strings with strcmp_P uint32_t lowercase_fnv1_hash(const char* ptr) { constexpr uint32_t fnv_prime = 16777619u; constexpr uint32_t fnv_basis = 2166136261u; const auto length = strlen_P(ptr); uint32_t hash = fnv_basis; for (size_t idx = 0; idx < length; ++idx) { hash = hash ^ static_cast(tolower(pgm_read_byte(&ptr[idx]))); hash = hash * fnv_prime; } return hash; } uint32_t lowercase_fnv1_hash(const __FlashStringHelper* ptr) { return lowercase_fnv1_hash(reinterpret_cast(ptr)); } } // namespace parsing } // namespace terminal