Source
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* lib/ts_fsm.c A naive finite state machine text search approach
*
* Authors: Thomas Graf <tgraf@suug.ch>
*
* ==========================================================================
*
* A finite state machine consists of n states (struct ts_fsm_token)
* representing the pattern as a finite automaton. The data is read
* sequentially on an octet basis. Every state token specifies the number
* of recurrences and the type of value accepted which can be either a
* specific character or ctype based set of characters. The available
* type of recurrences include 1, (0|1), [0 n], and [1 n].
*
* The algorithm differs between strict/non-strict mode specifying
* whether the pattern has to start at the first octet. Strict mode
* is enabled by default and can be disabled by inserting
* TS_FSM_HEAD_IGNORE as the first token in the chain.
*
* The runtime performance of the algorithm should be around O(n),
* however while in strict mode the average runtime can be better.
*/
struct ts_fsm
{
unsigned int ntokens;
struct ts_fsm_token tokens[0];
};
/* other values derived from ctype.h */
/* ascii */
/* wildcard */
/* Map to _ctype flags and some magic numbers */
static const u16 token_map[TS_FSM_TYPE_MAX+1] = {
[TS_FSM_SPECIFIC] = 0,
[TS_FSM_WILDCARD] = _W,
[TS_FSM_CNTRL] = _C,
[TS_FSM_LOWER] = _L,
[TS_FSM_UPPER] = _U,
[TS_FSM_PUNCT] = _P,
[TS_FSM_SPACE] = _S,
[TS_FSM_DIGIT] = _D,
[TS_FSM_XDIGIT] = _D | _X,
[TS_FSM_ALPHA] = _U | _L,
[TS_FSM_ALNUM] = _U | _L | _D,
[TS_FSM_PRINT] = _P | _U | _L | _D | _SP,
[TS_FSM_GRAPH] = _P | _U | _L | _D,
[TS_FSM_ASCII] = _A,
};
static const u16 token_lookup_tbl[256] = {
_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 0- 3 */
_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 4- 7 */
_W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S, /* 8- 11 */
_W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C, /* 12- 15 */
_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 16- 19 */
_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 20- 23 */
_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 24- 27 */
_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 28- 31 */
_W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 32- 35 */
_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 36- 39 */
_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 40- 43 */
_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 44- 47 */
_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 48- 51 */
_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 52- 55 */
_W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P, /* 56- 59 */
_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 60- 63 */
_W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, /* 64- 67 */
_W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U, /* 68- 71 */
_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 72- 75 */
_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 76- 79 */
_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 80- 83 */
_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 84- 87 */
_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P, /* 88- 91 */
_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 92- 95 */
_W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, /* 96- 99 */
_W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L, /* 100-103 */
_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 104-107 */
_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 108-111 */
_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 112-115 */
_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 116-119 */
_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P, /* 120-123 */
_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C, /* 124-127 */