[Forensics-changes] [yara] 311/415: Implement unbound jumps in hex strings by using chaining multiple strings
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:43:18 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit ddf3dda4de4ecc3d4a7ad04ce005aff80a36fa73
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Tue Dec 17 15:55:03 2013 +0100
Implement unbound jumps in hex strings by using chaining multiple strings
---
libyara/hex_grammar.c | 217 ++++++++++++++++--------------
libyara/hex_grammar.h | 2 +-
libyara/hex_grammar.y | 56 ++++----
libyara/hex_lexer.c | 113 ++++++++--------
libyara/hex_lexer.h | 1 +
libyara/hex_lexer.l | 12 +-
libyara/lexer.c | 2 +-
libyara/lexer.l | 2 +-
libyara/parser.c | 359 +++++++++++++++++++++++++++++++-------------------
libyara/re.c | 131 ++++++++++++++++--
libyara/re.h | 19 ++-
libyara/re_grammar.c | 127 +++++++-----------
libyara/re_grammar.h | 2 +-
libyara/re_grammar.y | 39 ------
libyara/re_lexer.c | 8 --
libyara/re_lexer.l | 8 --
libyara/rules.c | 92 ++++++++++---
libyara/yara.h | 84 ++++++------
18 files changed, 741 insertions(+), 533 deletions(-)
diff --git a/libyara/hex_grammar.c b/libyara/hex_grammar.c
index 8d27cf3..d57ab40 100644
--- a/libyara/hex_grammar.c
+++ b/libyara/hex_grammar.c
@@ -92,6 +92,7 @@
#include <stdint.h>
+#include <limits.h>
#include "hex_lexer.h"
#include "mem.h"
@@ -108,9 +109,6 @@
#define YYDEBUG 0
-#define mark_as_not_literal() \
- ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
#define mark_as_not_fast_hex_regexp() \
((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
@@ -154,13 +152,13 @@ yydebug = 1;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 73 "hex_grammar.y"
+#line 71 "hex_grammar.y"
{
int integer;
RE_NODE *re_node;
}
/* Line 193 of yacc.c. */
-#line 164 "hex_grammar.c"
+#line 162 "hex_grammar.c"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
@@ -173,7 +171,7 @@ typedef union YYSTYPE
/* Line 216 of yacc.c. */
-#line 177 "hex_grammar.c"
+#line 175 "hex_grammar.c"
#ifdef short
# undef short
@@ -391,13 +389,13 @@ union yyalloc
#define YYLAST 23
/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 14
+#define YYNTOKENS 15
/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 7
+#define YYNNTS 8
/* YYNRULES -- Number of rules. */
-#define YYNRULES 13
+#define YYNRULES 15
/* YYNRULES -- Number of states. */
-#define YYNSTATES 23
+#define YYNSTATES 26
/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
#define YYUNDEFTOK 2
@@ -413,7 +411,7 @@ static const yytype_uint8 yytranslate[] =
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 8, 9, 2, 2, 2, 12, 2, 2, 2, 2,
+ 8, 9, 2, 2, 2, 12, 13, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -421,7 +419,7 @@ static const yytype_uint8 yytranslate[] =
2, 10, 2, 11, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 6, 13, 7, 2, 2, 2, 2,
+ 2, 2, 2, 6, 14, 7, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -443,24 +441,25 @@ static const yytype_uint8 yytranslate[] =
YYRHS. */
static const yytype_uint8 yyprhs[] =
{
- 0, 0, 3, 7, 9, 12, 14, 18, 22, 24,
- 28, 30, 34, 36
+ 0, 0, 3, 7, 9, 12, 14, 15, 20, 24,
+ 26, 30, 33, 35, 39, 41
};
/* YYRHS -- A `-1'-separated list of the rules' RHS. */
static const yytype_int8 yyrhs[] =
{
- 15, 0, -1, 6, 16, 7, -1, 17, -1, 16,
- 17, -1, 20, -1, 8, 19, 9, -1, 10, 18,
- 11, -1, 5, -1, 5, 12, 5, -1, 16, -1,
- 19, 13, 16, -1, 3, -1, 4, -1
+ 16, 0, -1, 6, 17, 7, -1, 18, -1, 17,
+ 18, -1, 22, -1, -1, 8, 19, 21, 9, -1,
+ 10, 20, 11, -1, 5, -1, 5, 12, 5, -1,
+ 13, 13, -1, 17, -1, 21, 14, 17, -1, 3,
+ -1, 4, -1
};
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
static const yytype_uint8 yyrline[] =
{
- 0, 92, 92, 100, 104, 115, 119, 123, 132, 147,
- 173, 177, 190, 214
+ 0, 90, 90, 98, 102, 113, 118, 117, 126, 134,
+ 149, 172, 198, 202, 214, 222
};
#endif
@@ -470,8 +469,8 @@ static const yytype_uint8 yyrline[] =
static const char *const yytname[] =
{
"$end", "error", "$undefined", "_BYTE_", "_MASKED_BYTE_", "_NUMBER_",
- "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'|'", "$accept",
- "hex_string", "tokens", "token", "range", "alternatives", "byte", 0
+ "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'.'", "'|'", "$accept",
+ "hex_string", "tokens", "token", "@1", "range", "alternatives", "byte", 0
};
#endif
@@ -481,22 +480,22 @@ static const char *const yytname[] =
static const yytype_uint16 yytoknum[] =
{
0, 256, 257, 258, 259, 260, 123, 125, 40, 41,
- 91, 93, 45, 124
+ 91, 93, 45, 46, 124
};
# endif
/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
static const yytype_uint8 yyr1[] =
{
- 0, 14, 15, 16, 16, 17, 17, 17, 18, 18,
- 19, 19, 20, 20
+ 0, 15, 16, 17, 17, 18, 19, 18, 18, 20,
+ 20, 20, 21, 21, 22, 22
};
/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
static const yytype_uint8 yyr2[] =
{
- 0, 2, 3, 1, 2, 1, 3, 3, 1, 3,
- 1, 3, 1, 1
+ 0, 2, 3, 1, 2, 1, 0, 4, 3, 1,
+ 3, 2, 1, 3, 1, 1
};
/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -504,31 +503,31 @@ static const yytype_uint8 yyr2[] =
means the default is an error. */
static const yytype_uint8 yydefact[] =
{
- 0, 0, 0, 12, 13, 0, 0, 0, 3, 5,
- 1, 10, 0, 8, 0, 2, 4, 6, 0, 0,
- 7, 11, 9
+ 0, 0, 0, 14, 15, 6, 0, 0, 3, 5,
+ 1, 0, 9, 0, 0, 2, 4, 12, 0, 0,
+ 11, 8, 7, 0, 10, 13
};
/* YYDEFGOTO[NTERM-NUM]. */
static const yytype_int8 yydefgoto[] =
{
- -1, 2, 7, 8, 14, 12, 9
+ -1, 2, 7, 8, 11, 14, 18, 9
};
/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
STATE-NUM. */
-#define YYPACT_NINF -8
+#define YYPACT_NINF -11
static const yytype_int8 yypact[] =
{
- -5, 13, 8, -8, -8, 13, 6, 2, -8, -8,
- -8, 13, -6, 1, 7, -8, -8, -8, 13, 14,
- -8, 13, -8
+ -4, 4, 5, -11, -11, -11, -2, 12, -11, -11,
+ -11, 4, -6, 8, 6, -11, -11, 4, -5, 18,
+ -11, -11, -11, 4, -11, 4
};
/* YYPGOTO[NTERM-NUM]. */
static const yytype_int8 yypgoto[] =
{
- -8, -8, -3, -7, -8, -8, -8
+ -11, -11, -10, -7, -11, -11, -11, -11
};
/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
@@ -538,25 +537,25 @@ static const yytype_int8 yypgoto[] =
#define YYTABLE_NINF -1
static const yytype_uint8 yytable[] =
{
- 16, 1, 11, 17, 16, 3, 4, 18, 10, 15,
- 5, 13, 6, 19, 16, 21, 3, 4, 20, 22,
- 0, 5, 0, 6
+ 16, 17, 1, 12, 22, 10, 19, 3, 4, 23,
+ 16, 13, 5, 25, 6, 3, 4, 21, 16, 15,
+ 5, 20, 6, 24
};
-static const yytype_int8 yycheck[] =
+static const yytype_uint8 yycheck[] =
{
- 7, 6, 5, 9, 11, 3, 4, 13, 0, 7,
- 8, 5, 10, 12, 21, 18, 3, 4, 11, 5,
- -1, 8, -1, 10
+ 7, 11, 6, 5, 9, 0, 12, 3, 4, 14,
+ 17, 13, 8, 23, 10, 3, 4, 11, 25, 7,
+ 8, 13, 10, 5
};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
symbol of state STATE-NUM. */
static const yytype_uint8 yystos[] =
{
- 0, 6, 15, 3, 4, 8, 10, 16, 17, 20,
- 0, 16, 19, 5, 18, 7, 17, 9, 13, 12,
- 11, 16, 5
+ 0, 6, 16, 3, 4, 8, 10, 17, 18, 22,
+ 0, 19, 5, 13, 20, 7, 18, 17, 21, 12,
+ 13, 11, 9, 14, 5, 17
};
#define yyerrok (yyerrstatus = 0)
@@ -1076,30 +1075,30 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
switch (yytype)
{
- case 16: /* "tokens" */
-#line 84 "hex_grammar.y"
+ case 17: /* "tokens" */
+#line 82 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1083 "hex_grammar.c"
+#line 1082 "hex_grammar.c"
break;
- case 17: /* "token" */
-#line 85 "hex_grammar.y"
+ case 18: /* "token" */
+#line 83 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1088 "hex_grammar.c"
+#line 1087 "hex_grammar.c"
break;
- case 18: /* "range" */
-#line 88 "hex_grammar.y"
+ case 20: /* "range" */
+#line 86 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1093 "hex_grammar.c"
+#line 1092 "hex_grammar.c"
break;
- case 19: /* "alternatives" */
-#line 87 "hex_grammar.y"
+ case 21: /* "alternatives" */
+#line 85 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1098 "hex_grammar.c"
+#line 1097 "hex_grammar.c"
break;
- case 20: /* "byte" */
-#line 86 "hex_grammar.y"
+ case 22: /* "byte" */
+#line 84 "hex_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1103 "hex_grammar.c"
+#line 1102 "hex_grammar.c"
break;
default:
@@ -1409,7 +1408,7 @@ yyreduce:
switch (yyn)
{
case 2:
-#line 93 "hex_grammar.y"
+#line 91 "hex_grammar.y"
{
RE* re = yyget_extra(yyscanner);
re->root_node = (yyvsp[(2) - (3)].re_node);
@@ -1417,14 +1416,14 @@ yyreduce:
break;
case 3:
-#line 101 "hex_grammar.y"
+#line 99 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 4:
-#line 105 "hex_grammar.y"
+#line 103 "hex_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
@@ -1435,30 +1434,37 @@ yyreduce:
break;
case 5:
-#line 116 "hex_grammar.y"
+#line 114 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 6:
-#line 120 "hex_grammar.y"
+#line 118 "hex_grammar.y"
{
- (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
+ lex_env->inside_or++;
}
break;
case 7:
-#line 124 "hex_grammar.y"
+#line 122 "hex_grammar.y"
+ {
+ (yyval.re_node) = (yyvsp[(3) - (4)].re_node);
+ lex_env->inside_or--;
+ }
+ break;
+
+ case 8:
+#line 127 "hex_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = (yyvsp[(2) - (3)].re_node);
(yyval.re_node)->greedy = FALSE;
}
break;
- case 8:
-#line 133 "hex_grammar.y"
+ case 9:
+#line 135 "hex_grammar.y"
{
RE_NODE* re_any;
@@ -1475,8 +1481,8 @@ yyreduce:
}
break;
- case 9:
-#line 148 "hex_grammar.y"
+ case 10:
+#line 150 "hex_grammar.y"
{
RE_NODE* re_any;
@@ -1501,17 +1507,42 @@ yyreduce:
}
break;
- case 10:
-#line 174 "hex_grammar.y"
+ case 11:
+#line 173 "hex_grammar.y"
+ {
+ RE_NODE* re_any;
+
+ if (lex_env->inside_or)
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+ YYABORT;
+ }
+
+ re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+ ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+ ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ (yyval.re_node)->start = 0;
+ (yyval.re_node)->end = INT_MAX;
+ }
+ break;
+
+ case 12:
+#line 199 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
- case 11:
-#line 178 "hex_grammar.y"
+ case 13:
+#line 203 "hex_grammar.y"
{
- mark_as_not_literal();
mark_as_not_fast_hex_regexp();
(yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
@@ -1522,40 +1553,22 @@ yyreduce:
}
break;
- case 12:
-#line 191 "hex_grammar.y"
+ case 14:
+#line 215 "hex_grammar.y"
{
- RE* re = yyget_extra(yyscanner);
-
(yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
(yyval.re_node)->value = (yyvsp[(1) - (1)].integer);
-
- if (re->literal_string_len == re->literal_string_max)
- {
- re->literal_string_max *= 2;
-
- re->literal_string = yr_realloc(
- re->literal_string,
- re->literal_string_max);
-
- ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
- }
-
- re->literal_string[re->literal_string_len] = (yyvsp[(1) - (1)].integer);
- re->literal_string_len++;
}
break;
- case 13:
-#line 215 "hex_grammar.y"
+ case 15:
+#line 223 "hex_grammar.y"
{
uint8_t mask = (yyvsp[(1) - (1)].integer) >> 8;
- mark_as_not_literal();
-
if (mask == 0x00)
{
(yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
@@ -1576,7 +1589,7 @@ yyreduce:
/* Line 1267 of yacc.c. */
-#line 1580 "hex_grammar.c"
+#line 1593 "hex_grammar.c"
default: break;
}
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1790,7 +1803,7 @@ yyreturn:
}
-#line 238 "hex_grammar.y"
+#line 244 "hex_grammar.y"
diff --git a/libyara/hex_grammar.h b/libyara/hex_grammar.h
index 5d1606e..bc213c9 100644
--- a/libyara/hex_grammar.h
+++ b/libyara/hex_grammar.h
@@ -54,7 +54,7 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 73 "hex_grammar.y"
+#line 71 "hex_grammar.y"
{
int integer;
RE_NODE *re_node;
diff --git a/libyara/hex_grammar.y b/libyara/hex_grammar.y
index 3061104..3050e04 100644
--- a/libyara/hex_grammar.y
+++ b/libyara/hex_grammar.y
@@ -17,6 +17,7 @@ limitations under the License.
%{
#include <stdint.h>
+#include <limits.h>
#include "hex_lexer.h"
#include "mem.h"
@@ -33,9 +34,6 @@ limitations under the License.
#define YYDEBUG 0
-#define mark_as_not_literal() \
- ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
#define mark_as_not_fast_hex_regexp() \
((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
@@ -116,13 +114,17 @@ token : byte
{
$$ = $1;
}
- | '(' alternatives ')'
+ | '('
{
- $$ = $2;
+ lex_env->inside_or++;
+ }
+ alternatives ')'
+ {
+ $$ = $3;
+ lex_env->inside_or--;
}
| '[' range ']'
{
- mark_as_not_literal();
$$ = $2;
$$->greedy = FALSE;
}
@@ -167,6 +169,29 @@ range : _NUMBER_
$$->start = $1;
$$->end = $3;
}
+ | '.' '.'
+ {
+ RE_NODE* re_any;
+
+ if (lex_env->inside_or)
+ {
+ RE* re = yyget_extra(yyscanner);
+ re->error_code = ERROR_INVALID_HEX_STRING;
+ re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+ YYABORT;
+ }
+
+ re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+ ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ $$ = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+ ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+
+ $$->start = 0;
+ $$->end = INT_MAX;
+ }
;
@@ -176,7 +201,6 @@ alternatives : tokens
}
| alternatives '|' tokens
{
- mark_as_not_literal();
mark_as_not_fast_hex_regexp();
$$ = yr_re_node_create(RE_NODE_ALT, $1, $3);
@@ -189,34 +213,16 @@ alternatives : tokens
byte : _BYTE_
{
- RE* re = yyget_extra(yyscanner);
-
$$ = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
$$->value = $1;
-
- if (re->literal_string_len == re->literal_string_max)
- {
- re->literal_string_max *= 2;
-
- re->literal_string = yr_realloc(
- re->literal_string,
- re->literal_string_max);
-
- ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
- }
-
- re->literal_string[re->literal_string_len] = $1;
- re->literal_string_len++;
}
| _MASKED_BYTE_
{
uint8_t mask = $1 >> 8;
- mark_as_not_literal();
-
if (mask == 0x00)
{
$$ = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c
index 25c6c36..d462d99 100644
--- a/libyara/hex_lexer.c
+++ b/libyara/hex_lexer.c
@@ -362,8 +362,8 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
*yy_cp = '\0'; \
yyg->yy_c_buf_p = yy_cp;
-#define YY_NUM_RULES 11
-#define YY_END_OF_BUFFER 12
+#define YY_NUM_RULES 12
+#define YY_END_OF_BUFFER 13
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@@ -371,11 +371,11 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
-static yyconst flex_int16_t yy_accept[22] =
+static yyconst flex_int16_t yy_accept[23] =
{ 0,
- 0, 0, 0, 0, 12, 10, 9, 9, 10, 10,
- 5, 11, 6, 7, 8, 1, 2, 3, 4, 7,
- 0
+ 0, 0, 0, 0, 13, 11, 10, 10, 11, 11,
+ 5, 12, 6, 7, 8, 9, 1, 2, 3, 4,
+ 8, 0
} ;
static yyconst flex_int32_t yy_ec[256] =
@@ -384,14 +384,14 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 4, 1, 1, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 1, 1, 1,
- 1, 1, 6, 1, 7, 7, 7, 7, 7, 7,
+ 1, 1, 1, 1, 4, 5, 1, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 1, 1, 1,
+ 1, 1, 7, 1, 8, 8, 8, 8, 8, 8,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 8, 1, 9, 1, 1, 1, 7, 7, 7, 7,
+ 9, 1, 10, 1, 1, 1, 8, 8, 8, 8,
- 7, 7, 1, 1, 1, 1, 1, 1, 1, 1,
+ 8, 8, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -410,45 +410,45 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1
} ;
-static yyconst flex_int32_t yy_meta[10] =
+static yyconst flex_int32_t yy_meta[11] =
{ 0,
- 1, 1, 1, 1, 2, 2, 2, 1, 1
+ 1, 1, 1, 1, 1, 2, 2, 2, 1, 1
} ;
-static yyconst flex_int16_t yy_base[25] =
+static yyconst flex_int16_t yy_base[26] =
{ 0,
- 0, 0, 6, 12, 24, 25, 25, 25, 14, 13,
- 25, 25, 25, 13, 25, 25, 25, 25, 25, 9,
- 25, 21, 11, 10
+ 0, 0, 7, 14, 27, 28, 28, 28, 16, 15,
+ 28, 28, 28, 28, 15, 28, 28, 28, 28, 28,
+ 10, 28, 24, 13, 12
} ;
-static yyconst flex_int16_t yy_def[25] =
+static yyconst flex_int16_t yy_def[26] =
{ 0,
- 21, 1, 22, 22, 21, 21, 21, 21, 23, 24,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 0, 21, 21, 21
+ 22, 1, 23, 23, 22, 22, 22, 22, 24, 25,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 0, 22, 22, 22
} ;
-static yyconst flex_int16_t yy_nxt[35] =
+static yyconst flex_int16_t yy_nxt[39] =
{ 0,
- 6, 7, 8, 6, 9, 10, 9, 11, 6, 13,
- 14, 18, 16, 20, 15, 13, 14, 20, 19, 17,
- 15, 12, 12, 21, 5, 21, 21, 21, 21, 21,
- 21, 21, 21, 21
+ 6, 7, 8, 6, 6, 9, 10, 9, 11, 6,
+ 13, 14, 15, 19, 17, 21, 16, 13, 14, 15,
+ 21, 20, 18, 16, 12, 12, 22, 5, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22
} ;
-static yyconst flex_int16_t yy_chk[35] =
+static yyconst flex_int16_t yy_chk[39] =
{ 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 3,
- 3, 24, 23, 20, 3, 4, 4, 14, 10, 9,
- 4, 22, 22, 5, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 3, 3, 3, 25, 24, 21, 3, 4, 4, 4,
+ 15, 10, 9, 4, 23, 23, 5, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22
} ;
/* Table of booleans, true if rule could match eol. */
-static yyconst flex_int32_t yy_rule_can_match_eol[12] =
+static yyconst flex_int32_t yy_rule_can_match_eol[13] =
{ 0,
-0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, };
+0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, };
/* The intent behind this definition is that it'll catch
* any uses of REJECT which flex missed.
@@ -796,13 +796,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 22 )
+ if ( yy_current_state >= 23 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
++yy_cp;
}
- while ( yy_base[yy_current_state] != 25 );
+ while ( yy_base[yy_current_state] != 28 );
yy_find_action:
yy_act = yy_accept[yy_current_state];
@@ -896,6 +896,13 @@ case 7:
YY_RULE_SETUP
#line 107 "hex_lexer.l"
{
+ return yytext[0];
+}
+ YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 111 "hex_lexer.l"
+{
yylval->integer = atoi(yytext);
@@ -908,24 +915,24 @@ YY_RULE_SETUP
return _NUMBER_;
}
YY_BREAK
-case 8:
+case 9:
YY_RULE_SETUP
-#line 120 "hex_lexer.l"
+#line 124 "hex_lexer.l"
{
BEGIN(INITIAL);
return yytext[0];
}
YY_BREAK
-case 9:
-/* rule 9 can match eol */
+case 10:
+/* rule 10 can match eol */
YY_RULE_SETUP
-#line 127 "hex_lexer.l"
+#line 131 "hex_lexer.l"
// skip whitespace
YY_BREAK
-case 10:
+case 11:
YY_RULE_SETUP
-#line 130 "hex_lexer.l"
+#line 134 "hex_lexer.l"
{
if (yytext[0] >= 32 && yytext[0] < 127)
@@ -939,12 +946,12 @@ YY_RULE_SETUP
}
}
YY_BREAK
-case 11:
+case 12:
YY_RULE_SETUP
-#line 143 "hex_lexer.l"
+#line 147 "hex_lexer.l"
ECHO;
YY_BREAK
-#line 948 "hex_lexer.c"
+#line 955 "hex_lexer.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(range):
yyterminate();
@@ -1239,7 +1246,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 22 )
+ if ( yy_current_state >= 23 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1268,11 +1275,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 22 )
+ if ( yy_current_state >= 23 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 21);
+ yy_is_jam = (yy_current_state == 22);
return yy_is_jam ? 0 : yy_current_state;
}
@@ -2080,7 +2087,7 @@ void hex_yyfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
-#line 143 "hex_lexer.l"
+#line 147 "hex_lexer.l"
@@ -2127,21 +2134,17 @@ int yr_parse_hex_string(
LEX_ENVIRONMENT lex_env;
lex_env.last_error_message = NULL;
+ lex_env.inside_or = 0;
FAIL_ON_ERROR(yr_re_create(re));
- // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
- // is just a literal string and it can be matched by doing a simple string
- // comparison, without executing any regular expression code.
- //
// The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
// from a hex string that can be matched by faster algorithm. These regular
// expressions come from hex strings not contaning alternatives
// (like in 01 02 | 03 04).
//
- // These flags are unset later during parsing if necessary.
+ // This flag is unset later during parsing if necessary.
- (*re)->flags |= RE_FLAGS_LITERAL_STRING;
(*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
#ifdef WIN32
diff --git a/libyara/hex_lexer.h b/libyara/hex_lexer.h
index eaeda59..24fd60a 100644
--- a/libyara/hex_lexer.h
+++ b/libyara/hex_lexer.h
@@ -39,6 +39,7 @@ typedef void* yyscan_t;
typedef struct _LEX_ENVIRONMENT
{
+ int inside_or;
const char* last_error_message;
} LEX_ENVIRONMENT;
diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l
index e8247b8..c9e1df0 100644
--- a/libyara/hex_lexer.l
+++ b/libyara/hex_lexer.l
@@ -104,6 +104,10 @@ hexdigit [a-fA-F0-9]
return yytext[0];
}
+<range>\. {
+ return yytext[0];
+}
+
<range>{digit}+ {
yylval->integer = atoi(yytext);
@@ -185,21 +189,17 @@ int yr_parse_hex_string(
LEX_ENVIRONMENT lex_env;
lex_env.last_error_message = NULL;
+ lex_env.inside_or = 0;
FAIL_ON_ERROR(yr_re_create(re));
- // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
- // is just a literal string and it can be matched by doing a simple string
- // comparison, without executing any regular expression code.
- //
// The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
// from a hex string that can be matched by faster algorithm. These regular
// expressions come from hex strings not contaning alternatives
// (like in 01 02 | 03 04).
//
- // These flags are unset later during parsing if necessary.
+ // This flag is unset later during parsing if necessary.
- (*re)->flags |= RE_FLAGS_LITERAL_STRING;
(*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
#ifdef WIN32
diff --git a/libyara/lexer.c b/libyara/lexer.c
index 919735a..2776d94 100644
--- a/libyara/lexer.c
+++ b/libyara/lexer.c
@@ -406,7 +406,7 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 4, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 5, 6, 7, 8, 1, 1, 1, 9,
- 9, 10, 1, 1, 9, 1, 11, 12, 13, 14,
+ 9, 10, 1, 1, 9, 9, 11, 12, 13, 14,
15, 16, 16, 17, 16, 18, 16, 1, 1, 19,
20, 21, 9, 22, 23, 24, 23, 23, 23, 23,
25, 25, 25, 25, 26, 25, 27, 25, 25, 25,
diff --git a/libyara/lexer.l b/libyara/lexer.l
index 15ef042..66d1c79 100644
--- a/libyara/lexer.l
+++ b/libyara/lexer.l
@@ -477,7 +477,7 @@ $({letter}|{digit}|_)* {
}
-\{({hexdigit}|[ \-|\?\[\]\(\)\n\t])+\} {
+\{({hexdigit}|[ \-|\?\[\]\(\)\.\n\t])+\} {
int len = strlen(yytext);
SIZED_STRING* s = (SIZED_STRING*) yr_malloc(len + sizeof(SIZED_STRING));
diff --git a/libyara/parser.c b/libyara/parser.c
index a621604..6d04886 100644
--- a/libyara/parser.c
+++ b/libyara/parser.c
@@ -198,47 +198,192 @@ YR_EXTERNAL_VARIABLE* yr_parser_lookup_external_variable(
}
-YR_STRING* yr_parser_reduce_string_declaration(
- yyscan_t yyscanner,
- int32_t flags,
+int _yr_parser_write_string(
const char* identifier,
- SIZED_STRING* str)
+ int flags,
+ YR_COMPILER* compiler,
+ SIZED_STRING* str,
+ RE* re,
+ YR_STRING** string,
+ int* min_atom_length)
{
- int min_atom_length;
- char* file_name;
- char message[512];
-
- YR_STRING* string;
+ SIZED_STRING* literal_string;
YR_AC_MATCH* new_match;
+
YR_ATOM_LIST_ITEM* atom;
YR_ATOM_LIST_ITEM* atom_list = NULL;
- RE* re = NULL;
- uint8_t* literal_string = NULL;
-
- int literal_string_len = 0;
+ int result;
int max_string_len;
+ int free_literal = FALSE;
- YR_COMPILER* compiler = yyget_extra(yyscanner);
+ *string = NULL;
- compiler->last_result = yr_arena_allocate_struct(
+ result = yr_arena_allocate_struct(
compiler->strings_arena,
sizeof(YR_STRING),
- (void**) &string,
+ (void**) string,
offsetof(YR_STRING, identifier),
offsetof(YR_STRING, string),
+ offsetof(YR_STRING, chained_to),
EOL);
- if (compiler->last_result != ERROR_SUCCESS)
- return NULL;
+ if (result != ERROR_SUCCESS)
+ return result;
- compiler->last_result = yr_arena_write_string(
+ result = yr_arena_write_string(
compiler->sz_arena,
identifier,
- &string->identifier);
+ &(*string)->identifier);
- if (compiler->last_result != ERROR_SUCCESS)
- return NULL;
+ if (result != ERROR_SUCCESS)
+ return result;
+
+ if (flags & STRING_GFLAGS_HEXADECIMAL ||
+ flags & STRING_GFLAGS_REGEXP)
+ {
+ literal_string = yr_re_extract_literal(re);
+
+ if (literal_string != NULL)
+ {
+ flags |= STRING_GFLAGS_LITERAL;
+ free_literal = TRUE;
+ }
+ }
+ else
+ {
+ literal_string = str;
+ flags |= STRING_GFLAGS_LITERAL;
+ }
+
+ (*string)->g_flags = flags;
+ (*string)->chained_to = NULL;
+
+ memset((*string)->matches, 0,
+ sizeof((*string)->matches));
+
+ memset((*string)->unconfirmed_matches, 0,
+ sizeof((*string)->unconfirmed_matches));
+
+ if (flags & STRING_GFLAGS_LITERAL)
+ {
+ (*string)->length = literal_string->length;
+
+ result = yr_arena_write_data(
+ compiler->sz_arena,
+ literal_string->c_string,
+ literal_string->length,
+ (void*) &(*string)->string);
+
+ if (result == ERROR_SUCCESS)
+ {
+ result = yr_atoms_extract_from_string(
+ (uint8_t*) literal_string->c_string,
+ literal_string->length,
+ flags,
+ &atom_list);
+ }
+ }
+ else
+ {
+ result = yr_re_emit_code(re, compiler->re_code_arena);
+
+ if (result == ERROR_SUCCESS)
+ result = yr_atoms_extract_from_re(re, flags, &atom_list);
+ }
+
+ if (result == ERROR_SUCCESS)
+ {
+ // Add the string to Aho-Corasick automaton.
+
+ if (atom_list != NULL)
+ {
+ result = yr_ac_add_string(
+ compiler->automaton_arena,
+ compiler->automaton,
+ *string,
+ atom_list);
+ }
+ else
+ {
+ result = yr_arena_allocate_struct(
+ compiler->automaton_arena,
+ sizeof(YR_AC_MATCH),
+ (void**) &new_match,
+ offsetof(YR_AC_MATCH, string),
+ offsetof(YR_AC_MATCH, forward_code),
+ offsetof(YR_AC_MATCH, backward_code),
+ offsetof(YR_AC_MATCH, next),
+ EOL);
+
+ if (result == ERROR_SUCCESS)
+ {
+ new_match->backtrack = 0;
+ new_match->string = *string;
+ new_match->forward_code = re->root_node->forward_code;
+ new_match->backward_code = NULL;
+ new_match->next = compiler->automaton->root->matches;
+ compiler->automaton->root->matches = new_match;
+ }
+ }
+ }
+
+ atom = atom_list;
+
+ if (atom != NULL)
+ *min_atom_length = MAX_ATOM_LENGTH;
+ else
+ *min_atom_length = 0;
+
+ while (atom != NULL)
+ {
+ if (atom->atom_length < *min_atom_length)
+ *min_atom_length = atom->atom_length;
+ atom = atom->next;
+ }
+
+ if (flags & STRING_GFLAGS_LITERAL)
+ {
+ if (flags & STRING_GFLAGS_WIDE)
+ max_string_len = (*string)->length * 2;
+ else
+ max_string_len = (*string)->length;
+
+ if (max_string_len == *min_atom_length)
+ (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
+ }
+
+ if (free_literal)
+ yr_free(literal_string);
+
+ if (atom_list != NULL)
+ yr_atoms_list_destroy(atom_list);
+
+ return result;
+}
+
+#include <stdint.h>
+#include <limits.h>
+
+
+YR_STRING* yr_parser_reduce_string_declaration(
+ yyscan_t yyscanner,
+ int32_t flags,
+ const char* identifier,
+ SIZED_STRING* str)
+{
+ int min_atom_length;
+ int min_atom_length_aux;
+
+ char* file_name;
+ char message[512];
+
+ YR_COMPILER* compiler = yyget_extra(yyscanner);
+ YR_STRING* string = NULL;
+ YR_STRING* prev_string;
+
+ RE* re = NULL;
+ RE* remainder_re;
if (strcmp(identifier,"$") == 0)
flags |= STRING_GFLAGS_ANONYMOUS;
@@ -257,10 +402,6 @@ YR_STRING* yr_parser_reduce_string_declaration(
flags |= STRING_GFLAGS_SINGLE_MATCH;
- string->g_flags = flags;
-
- memset(string->matches, 0, sizeof(string->matches));
-
if (flags & STRING_GFLAGS_HEXADECIMAL ||
flags & STRING_GFLAGS_REGEXP)
{
@@ -282,127 +423,84 @@ YR_STRING* yr_parser_reduce_string_declaration(
identifier,
re->error_message);
- yr_compiler_set_error_extra_info(compiler, message);
- string = NULL;
+ yr_compiler_set_error_extra_info(
+ compiler, message);
+
goto _exit;
}
if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
- string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;
+ flags |= STRING_GFLAGS_FAST_HEX_REGEXP;
- if (re->flags & RE_FLAGS_LITERAL_STRING)
- {
- string->g_flags |= STRING_GFLAGS_LITERAL;
- literal_string = re->literal_string;
- literal_string_len = re->literal_string_len;
+ compiler->last_result = yr_re_split_at_chaining_point(
+ re, &re, &remainder_re);
- compiler->last_result = yr_atoms_extract_from_string(
- literal_string, literal_string_len, string->g_flags, &atom_list);
- }
- else
- {
- compiler->last_result = yr_re_emit_code(
- re, compiler->re_code_arena);
-
- if (compiler->last_result != ERROR_SUCCESS)
- {
- string = NULL;
- goto _exit;
- }
-
- compiler->last_result = yr_atoms_extract_from_re(
- re, string->g_flags, &atom_list);
- }
- }
- else
- {
- string->g_flags |= STRING_GFLAGS_LITERAL;
- literal_string = (uint8_t*) str->c_string;
- literal_string_len = str->length;
+ if (compiler->last_result != ERROR_SUCCESS)
+ goto _exit;
- compiler->last_result = yr_atoms_extract_from_string(
- literal_string, literal_string_len, string->g_flags, &atom_list);
- }
+ compiler->last_result = _yr_parser_write_string(
+ identifier,
+ flags,
+ compiler,
+ NULL,
+ re,
+ &string,
+ &min_atom_length);
- if (compiler->last_result != ERROR_SUCCESS)
- {
- string = NULL;
- goto _exit;
- }
+ if (compiler->last_result != ERROR_SUCCESS)
+ goto _exit;
- if (STRING_IS_LITERAL(string))
- {
- compiler->last_result = yr_arena_write_data(
- compiler->sz_arena,
- literal_string,
- literal_string_len,
- (void*) &string->string);
+ if (remainder_re != NULL)
+ string->g_flags |= STRING_GFLAGS_CHAIN_TAIL |
+ STRING_GFLAGS_CHAIN_PART;
- if (compiler->last_result != ERROR_SUCCESS)
+ while (remainder_re != NULL)
{
- string = NULL;
- goto _exit;
- }
+ // Destroy regexp pointed by 're' before yr_re_split_at_jmp
+ // overwrites 're' with another value.
- string->length = literal_string_len;
- }
+ yr_re_destroy(re);
- // Add the string to Aho-Corasick automaton.
+ compiler->last_result = yr_re_split_at_chaining_point(
+ remainder_re, &re, &remainder_re);
- if (atom_list != NULL)
- {
- compiler->last_result = yr_ac_add_string(
- compiler->automaton_arena,
- compiler->automaton,
- string,
- atom_list);
- }
- else
- {
- compiler->last_result = yr_arena_allocate_struct(
- compiler->automaton_arena,
- sizeof(YR_AC_MATCH),
- (void**) &new_match,
- offsetof(YR_AC_MATCH, string),
- offsetof(YR_AC_MATCH, forward_code),
- offsetof(YR_AC_MATCH, backward_code),
- offsetof(YR_AC_MATCH, next),
- EOL);
+ if (compiler->last_result != ERROR_SUCCESS)
+ goto _exit;
- if (compiler->last_result == ERROR_SUCCESS)
- {
- new_match->backtrack = 0;
- new_match->string = string;
- new_match->forward_code = re->root_node->forward_code;
- new_match->backward_code = NULL;
- new_match->next = compiler->automaton->root->matches;
- compiler->automaton->root->matches = new_match;
- }
- }
+ prev_string = string;
- atom = atom_list;
+ compiler->last_result = _yr_parser_write_string(
+ identifier,
+ flags,
+ compiler,
+ NULL,
+ re,
+ &string,
+ &min_atom_length_aux);
- if (atom != NULL)
- min_atom_length = MAX_ATOM_LENGTH;
- else
- min_atom_length = 0;
+ if (compiler->last_result != ERROR_SUCCESS)
+ goto _exit;
- while (atom != NULL)
- {
- if (atom->atom_length < min_atom_length)
- min_atom_length = atom->atom_length;
- atom = atom->next;
- }
+ if (min_atom_length_aux < min_atom_length)
+ min_atom_length = min_atom_length_aux;
- if (STRING_IS_LITERAL(string))
+ string->g_flags |= STRING_GFLAGS_CHAIN_PART;
+ prev_string->chained_to = string;
+ }
+ }
+ else
{
- if (STRING_IS_WIDE(string))
- max_string_len = string->length * 2;
- else
- max_string_len = string->length;
+ compiler->last_result = _yr_parser_write_string(
+ identifier,
+ flags,
+ compiler,
+ str,
+ NULL,
+ &string,
+ &min_atom_length);
- if (max_string_len == min_atom_length)
- string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
+ if (compiler->last_result != ERROR_SUCCESS)
+ goto _exit;
}
if (compiler->file_name_stack_ptr > 0)
@@ -426,17 +524,14 @@ YR_STRING* yr_parser_reduce_string_declaration(
message);
}
- if (compiler->last_result != ERROR_SUCCESS)
- string = NULL;
-
_exit:
- if (atom_list != NULL)
- yr_atoms_list_destroy(atom_list);
-
if (re != NULL)
yr_re_destroy(re);
+ if (compiler->last_result != ERROR_SUCCESS)
+ return NULL;
+
return string;
}
diff --git a/libyara/re.c b/libyara/re.c
index 3a2b5f8..f6e69b9 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -28,6 +28,7 @@ order to avoid confusion with operating system threads.
#include <assert.h>
#include <ctype.h>
#include <string.h>
+#include <limits.h>
#ifdef WIN32
#include <windows.h>
@@ -215,16 +216,6 @@ int yr_re_create(
if (*re == NULL)
return ERROR_INSUFICIENT_MEMORY;
- (*re)->literal_string_len = 0;
- (*re)->literal_string_max = 128;
- (*re)->literal_string = yr_malloc(128);
-
- if ((*re)->literal_string == NULL)
- {
- yr_free(*re);
- return ERROR_INSUFICIENT_MEMORY;
- }
-
(*re)->flags = 0;
(*re)->root_node = NULL;
(*re)->error_message = NULL;
@@ -243,9 +234,6 @@ void yr_re_destroy(
if (re->error_message != NULL)
yr_free((char*) re->error_message);
- if (re->literal_string != NULL)
- yr_free(re->literal_string);
-
yr_free(re);
}
@@ -265,6 +253,123 @@ int yr_re_compile_hex(
return yr_parse_hex_string(hex_string, re);
}
+//
+// yr_re_extract_literal
+//
+// Verifies if the provided regular expression is just a literal string
+// like "abc", "12345", without any wildcard, operator, etc. In that case
+// returns the string as a SIZED_STRING, or returns NULL if otherwise.
+//
+// The caller is responsible for deallocating the returned SIZED_STRING by
+// calling yr_free.
+//
+
+SIZED_STRING* yr_re_extract_literal(
+ RE* re)
+{
+ SIZED_STRING* string;
+ RE_NODE* node = re->root_node;
+
+ int i, length = 0;
+ char tmp;
+
+ while (node != NULL)
+ {
+ length++;
+
+ if (node->type == RE_NODE_LITERAL)
+ break;
+
+ if (node->type != RE_NODE_CONCAT)
+ return NULL;
+
+ if (node->right == NULL ||
+ node->right->type != RE_NODE_LITERAL)
+ return NULL;
+
+ node = node->left;
+ }
+
+ string = yr_malloc(sizeof(SIZED_STRING) + length);
+ string->length = 0;
+
+ if (string == NULL)
+ return NULL;
+
+ node = re->root_node;
+
+ while (node->type == RE_NODE_CONCAT)
+ {
+ string->c_string[string->length++] = node->right->value;
+ node = node->left;
+ }
+
+ string->c_string[string->length++] = node->value;
+
+ // The string ends up reversed. Reverse it back to its original value.
+
+ for (i = 0; i < length / 2; i++)
+ {
+ tmp = string->c_string[i];
+ string->c_string[i] = string->c_string[length - i - 1];
+ string->c_string[length - i - 1] = tmp;
+ }
+
+ return string;
+}
+
+
+int yr_re_split_at_chaining_point(
+ RE* re,
+ RE** result_re,
+ RE** remainder_re)
+{
+ RE_NODE* node = re->root_node;
+ RE_NODE* child = re->root_node->left;
+ RE_NODE* parent = NULL;
+
+ int result;
+
+ *result_re = re;
+ *remainder_re = NULL;
+
+ while (child != NULL && child->type == RE_NODE_CONCAT)
+ {
+ if (child->right != NULL &&
+ child->right->type == RE_NODE_RANGE &&
+ child->right->greedy == FALSE &&
+ child->right->start == 0 &&
+ child->right->end == INT_MAX &&
+ child->right->left->type == RE_NODE_ANY)
+ {
+ result = yr_re_create(remainder_re);
+
+ if (result != ERROR_SUCCESS)
+ return result;
+
+ (*remainder_re)->root_node = child->left;
+
+ child->left = NULL;
+
+ if (parent != NULL)
+ parent->left = node->right;
+ else
+ (*result_re)->root_node = node->right;
+
+ node->right = NULL;
+ yr_re_node_destroy(node);
+
+ return ERROR_SUCCESS;
+ }
+
+ parent = node;
+ node = child;
+ child = child->left;
+ }
+
+ return ERROR_SUCCESS;
+}
+
int _yr_emit_inst(
YR_ARENA* arena,
diff --git a/libyara/re.h b/libyara/re.h
index 3e12cd2..f1a0dae 100644
--- a/libyara/re.h
+++ b/libyara/re.h
@@ -18,6 +18,7 @@ limitations under the License.
#define _RE_H
#include "yara.h"
+#include "sizedstr.h"
#define RE_NODE_LITERAL 1
#define RE_NODE_MASKED_LITERAL 2
@@ -61,7 +62,7 @@ limitations under the License.
#define RE_OPCODE_JNZ 0xB4
#define RE_OPCODE_JUMP 0xB5
-#define RE_FLAGS_LITERAL_STRING 0x01
+
#define RE_FLAGS_FAST_HEX_REGEXP 0x02
#define RE_FLAGS_BACKWARDS 0x04
#define RE_FLAGS_EXHAUSTIVE 0x08
@@ -113,11 +114,6 @@ struct RE {
const char* error_message;
int error_code;
-
- uint8_t* literal_string;
-
- int literal_string_len;
- int literal_string_max;
};
@@ -166,10 +162,21 @@ void yr_re_node_destroy(
RE_NODE* node);
+SIZED_STRING* yr_re_extract_literal(
+ RE* re);
+
+
+int yr_re_split_at_chaining_point(
+ RE* re,
+ RE** result_re,
+ RE** remainder_re);
+
+
int yr_re_emit_code(
RE* re,
YR_ARENA* arena);
+
int yr_re_exec(
uint8_t* code,
uint8_t* input,
diff --git a/libyara/re_grammar.c b/libyara/re_grammar.c
index 244f18c..29f9778 100644
--- a/libyara/re_grammar.c
+++ b/libyara/re_grammar.c
@@ -126,11 +126,6 @@
yydebug = 1;
#endif
-
-#define mark_as_not_literal() \
- ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
-
#define ERROR_IF(x, error) \
if (x) \
{ \
@@ -167,7 +162,7 @@ yydebug = 1;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 72 "re_grammar.y"
+#line 67 "re_grammar.y"
{
int integer;
uint32_t range;
@@ -175,7 +170,7 @@ typedef union YYSTYPE
uint8_t* class_vector;
}
/* Line 193 of yacc.c. */
-#line 179 "re_grammar.c"
+#line 174 "re_grammar.c"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
@@ -188,7 +183,7 @@ typedef union YYSTYPE
/* Line 216 of yacc.c. */
-#line 192 "re_grammar.c"
+#line 187 "re_grammar.c"
#ifdef short
# undef short
@@ -479,9 +474,9 @@ static const yytype_int8 yyrhs[] =
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 101, 101, 106, 109, 113, 123, 139, 143, 153,
- 161, 171, 179, 189, 200, 212, 223, 227, 234, 243,
- 247, 254, 277, 284, 291, 298, 305, 312, 319
+ 0, 96, 96, 101, 104, 108, 117, 132, 136, 146,
+ 153, 162, 169, 178, 188, 199, 209, 213, 219, 227,
+ 231, 237, 245, 251, 257, 263, 269, 275, 281
};
#endif
@@ -1110,29 +1105,29 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
switch (yytype)
{
case 6: /* "_CLASS_" */
-#line 93 "re_grammar.y"
+#line 88 "re_grammar.y"
{ yr_free((yyvaluep->class_vector)); };
-#line 1116 "re_grammar.c"
+#line 1111 "re_grammar.c"
break;
case 24: /* "alternative" */
-#line 94 "re_grammar.y"
+#line 89 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1121 "re_grammar.c"
+#line 1116 "re_grammar.c"
break;
case 25: /* "concatenation" */
-#line 95 "re_grammar.y"
+#line 90 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1126 "re_grammar.c"
+#line 1121 "re_grammar.c"
break;
case 26: /* "repeat" */
-#line 96 "re_grammar.y"
+#line 91 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1131 "re_grammar.c"
+#line 1126 "re_grammar.c"
break;
case 27: /* "single" */
-#line 97 "re_grammar.y"
+#line 92 "re_grammar.y"
{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1136 "re_grammar.c"
+#line 1131 "re_grammar.c"
break;
default:
@@ -1442,7 +1437,7 @@ yyreduce:
switch (yyn)
{
case 2:
-#line 102 "re_grammar.y"
+#line 97 "re_grammar.y"
{
RE* re = yyget_extra(yyscanner);
re->root_node = (yyvsp[(1) - (1)].re_node);
@@ -1450,16 +1445,15 @@ yyreduce:
break;
case 4:
-#line 110 "re_grammar.y"
+#line 105 "re_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 5:
-#line 114 "re_grammar.y"
+#line 109 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1470,11 +1464,10 @@ yyreduce:
break;
case 6:
-#line 124 "re_grammar.y"
+#line 118 "re_grammar.y"
{
RE_NODE* node;
- mark_as_not_literal();
node = yr_re_node_create(RE_NODE_EMPTY, NULL, NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1487,14 +1480,14 @@ yyreduce:
break;
case 7:
-#line 140 "re_grammar.y"
+#line 133 "re_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 8:
-#line 144 "re_grammar.y"
+#line 137 "re_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
@@ -1505,9 +1498,8 @@ yyreduce:
break;
case 9:
-#line 154 "re_grammar.y"
+#line 147 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (2)].re_node), NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1516,9 +1508,8 @@ yyreduce:
break;
case 10:
-#line 162 "re_grammar.y"
+#line 154 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (3)].re_node), NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1529,9 +1520,8 @@ yyreduce:
break;
case 11:
-#line 172 "re_grammar.y"
+#line 163 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (2)].re_node), NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1540,9 +1530,8 @@ yyreduce:
break;
case 12:
-#line 180 "re_grammar.y"
+#line 170 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (3)].re_node), NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1553,9 +1542,8 @@ yyreduce:
break;
case 13:
-#line 190 "re_grammar.y"
+#line 179 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1567,9 +1555,8 @@ yyreduce:
break;
case 14:
-#line 201 "re_grammar.y"
+#line 189 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (3)].re_node), NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1582,9 +1569,8 @@ yyreduce:
break;
case 15:
-#line 213 "re_grammar.y"
+#line 200 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1596,16 +1582,15 @@ yyreduce:
break;
case 16:
-#line 224 "re_grammar.y"
+#line 210 "re_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 17:
-#line 228 "re_grammar.y"
+#line 214 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1613,9 +1598,8 @@ yyreduce:
break;
case 18:
-#line 235 "re_grammar.y"
+#line 220 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1623,16 +1607,15 @@ yyreduce:
break;
case 19:
-#line 244 "re_grammar.y"
+#line 228 "re_grammar.y"
{
(yyval.re_node) = (yyvsp[(2) - (3)].re_node);
}
break;
case 20:
-#line 248 "re_grammar.y"
+#line 232 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1640,35 +1623,19 @@ yyreduce:
break;
case 21:
-#line 255 "re_grammar.y"
+#line 238 "re_grammar.y"
{
- RE* re = yyget_extra(yyscanner);
-
(yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
(yyval.re_node)->value = (yyvsp[(1) - (1)].integer);
-
- if (re->literal_string_len == re->literal_string_max)
- {
- re->literal_string_max *= 2;
- re->literal_string = yr_realloc(
- re->literal_string,
- re->literal_string_max);
-
- ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
- }
-
- re->literal_string[re->literal_string_len] = (yyvsp[(1) - (1)].integer);
- re->literal_string_len++;
}
break;
case 22:
-#line 278 "re_grammar.y"
+#line 246 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_WORD_CHAR, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1676,9 +1643,8 @@ yyreduce:
break;
case 23:
-#line 285 "re_grammar.y"
+#line 252 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_NON_WORD_CHAR, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1686,9 +1652,8 @@ yyreduce:
break;
case 24:
-#line 292 "re_grammar.y"
+#line 258 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_SPACE, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1696,9 +1661,8 @@ yyreduce:
break;
case 25:
-#line 299 "re_grammar.y"
+#line 264 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_NON_SPACE, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1706,9 +1670,8 @@ yyreduce:
break;
case 26:
-#line 306 "re_grammar.y"
+#line 270 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_DIGIT, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1716,9 +1679,8 @@ yyreduce:
break;
case 27:
-#line 313 "re_grammar.y"
+#line 276 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_NON_DIGIT, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1726,9 +1688,8 @@ yyreduce:
break;
case 28:
-#line 320 "re_grammar.y"
+#line 282 "re_grammar.y"
{
- mark_as_not_literal();
(yyval.re_node) = yr_re_node_create(RE_NODE_CLASS, NULL, NULL);
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1739,7 +1700,7 @@ yyreduce:
/* Line 1267 of yacc.c. */
-#line 1743 "re_grammar.c"
+#line 1704 "re_grammar.c"
default: break;
}
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1953,7 +1914,7 @@ yyreturn:
}
-#line 331 "re_grammar.y"
+#line 292 "re_grammar.y"
diff --git a/libyara/re_grammar.h b/libyara/re_grammar.h
index d4095f6..08e72c7 100644
--- a/libyara/re_grammar.h
+++ b/libyara/re_grammar.h
@@ -68,7 +68,7 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 72 "re_grammar.y"
+#line 67 "re_grammar.y"
{
int integer;
uint32_t range;
diff --git a/libyara/re_grammar.y b/libyara/re_grammar.y
index 268677e..5d8c59a 100644
--- a/libyara/re_grammar.y
+++ b/libyara/re_grammar.y
@@ -37,11 +37,6 @@ limitations under the License.
yydebug = 1;
#endif
-
-#define mark_as_not_literal() \
- ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
-
#define ERROR_IF(x, error) \
if (x) \
{ \
@@ -112,7 +107,6 @@ alternative : concatenation
}
| alternative '|' concatenation
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_ALT, $1, $3);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -124,7 +118,6 @@ alternative : concatenation
{
RE_NODE* node;
- mark_as_not_literal();
node = yr_re_node_create(RE_NODE_EMPTY, NULL, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -152,7 +145,6 @@ concatenation : repeat
repeat : single '*'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_STAR, $1, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -160,7 +152,6 @@ repeat : single '*'
}
| single '*' '?'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_STAR, $1, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -170,7 +161,6 @@ repeat : single '*'
}
| single '+'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_PLUS, $1, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -178,7 +168,6 @@ repeat : single '*'
}
| single '+' '?'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_PLUS, $1, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -188,7 +177,6 @@ repeat : single '*'
}
| single '?'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -199,7 +187,6 @@ repeat : single '*'
}
| single '?' '?'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -211,7 +198,6 @@ repeat : single '*'
}
| single _RANGE_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL);
DESTROY_NODE_IF($$ == NULL, $1);
@@ -226,14 +212,12 @@ repeat : single '*'
}
| '^'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| '$'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -246,79 +230,56 @@ single : '(' alternative ')'
}
| '.'
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| _CHAR_
{
- RE* re = yyget_extra(yyscanner);
-
$$ = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
$$->value = $1;
-
- if (re->literal_string_len == re->literal_string_max)
- {
- re->literal_string_max *= 2;
- re->literal_string = yr_realloc(
- re->literal_string,
- re->literal_string_max);
-
- ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
- }
-
- re->literal_string[re->literal_string_len] = $1;
- re->literal_string_len++;
}
| _WORD_CHAR_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_WORD_CHAR, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| _NON_WORD_CHAR_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_NON_WORD_CHAR, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| _SPACE_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_SPACE, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| _NON_SPACE_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_NON_SPACE, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| _DIGIT_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_DIGIT, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| _NON_DIGIT_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_NON_DIGIT, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
}
| _CLASS_
{
- mark_as_not_literal();
$$ = yr_re_node_create(RE_NODE_CLASS, NULL, NULL);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index 6d6e1c4..9d861cb 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -2504,14 +2504,6 @@ int yr_parse_re_string(
FAIL_ON_ERROR(yr_re_create(re));
- // The RE_FLAGS_LITERAL_STRING flag indicates that the
- // regular expression is just a literal string and it can
- // be matched by doing a simple string comparison, without
- // executing any regular expression code. We initially set
- // this flag which is unset later during parsing if necessary.
-
- (*re)->flags |= RE_FLAGS_LITERAL_STRING;
-
#ifdef WIN32
TlsSetValue(recovery_state_key, (LPVOID) &recovery_state);
#else
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index a504377..4f5867d 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -466,14 +466,6 @@ int yr_parse_re_string(
FAIL_ON_ERROR(yr_re_create(re));
- // The RE_FLAGS_LITERAL_STRING flag indicates that the
- // regular expression is just a literal string and it can
- // be matched by doing a simple string comparison, without
- // executing any regular expression code. We initially set
- // this flag which is unset later during parsing if necessary.
-
- (*re)->flags |= RE_FLAGS_LITERAL_STRING;
-
#ifdef WIN32
TlsSetValue(recovery_state_key, (LPVOID) &recovery_state);
#else
diff --git a/libyara/rules.c b/libyara/rules.c
index b0d6fdc..e12b76f 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -300,7 +300,60 @@ int _yr_scan_fast_hex_re_exec(
return -1;
}
-void match_callback(
+void _yr_scan_confirm_matches(
+ int tidx,
+ YR_STRING* string,
+ size_t match_offset,
+ int match_length)
+{
+ YR_MATCH* match;
+ YR_MATCH* next_match;
+
+ if (string->chained_to == NULL)
+ return;
+
+ match = string->chained_to->unconfirmed_matches[tidx].head;
+
+ while (match != NULL)
+ {
+ next_match = match->next;
+
+ if (match_offset >= match->first_offset + match->length)
+ {
+ if (match->prev != NULL)
+ match->prev->next = match->next;
+
+ if (match->next != NULL)
+ match->next->prev = match->prev;
+
+ if (match == string->chained_to->unconfirmed_matches[tidx].head)
+ string->chained_to->unconfirmed_matches[tidx].head = match->next;
+
+ if (match == string->chained_to->unconfirmed_matches[tidx].tail)
+ string->chained_to->unconfirmed_matches[tidx].tail = match->prev;
+
+ match->prev = string->chained_to->matches[tidx].tail;
+ match->next = NULL;
+ match->length = match_offset - match->first_offset + match_length;
+
+ if (string->chained_to->matches[tidx].head == NULL)
+ string->chained_to->matches[tidx].head = match;
+
+ if (string->chained_to->matches[tidx].tail != NULL)
+ string->chained_to->matches[tidx].tail->next = match;
+
+ string->chained_to->matches[tidx].tail = match;
+
+ _yr_scan_confirm_matches(
+ tidx, string->chained_to, match->first_offset, match->length);
+ }
+
+ match = next_match;
+ }
+}
+
+
+void _yr_rules_match_callback(
uint8_t* match_data,
int match_length,
int flags,
@@ -308,6 +361,7 @@ void match_callback(
{
YR_MATCH* new_match;
YR_MATCH* match;
+ YR_MATCHES* matches;
CALLBACK_ARGS* callback_args = args;
YR_STRING* string = callback_args->string;
@@ -358,7 +412,18 @@ void match_callback(
}
}
- match = string->matches[tidx].tail;
+ if (STRING_IS_CHAIN_TAIL(string))
+ {
+ _yr_scan_confirm_matches(tidx, string, match_offset, match_length);
+ return;
+ }
+
+ if (STRING_IS_CHAIN_PART(string))
+ matches = &string->unconfirmed_matches[tidx];
+ else
+ matches = &string->matches[tidx];
+
+ match = matches->tail;
while (match != NULL)
{
@@ -397,34 +462,29 @@ void match_callback(
new_match->first_offset = match_offset;
new_match->last_offset = match_offset;
new_match->length = match_length;
+ new_match->data = match_data;
if (match != NULL)
{
new_match->next = match->next;
+ new_match->prev = match;
match->next = new_match;
}
else
{
- new_match->next = string->matches[tidx].head;
- string->matches[tidx].head = new_match;
+ new_match->next = matches->head;
+ matches->head = new_match;
}
if (new_match->next != NULL)
new_match->next->prev = new_match;
else
- string->matches[tidx].tail = new_match;
+ matches->tail = new_match;
new_match->prev = match;
- //TODO: handle errors
- yr_arena_write_data(
- callback_args->matches_arena,
- match_data,
- match_length,
- (void**) &new_match->data);
}
-
typedef int (*RE_EXEC_FUNC)(
uint8_t* code,
uint8_t* input,
@@ -503,12 +563,12 @@ int _yr_scan_verify_re_match(
data + offset,
offset + 1,
flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
- match_callback,
+ _yr_rules_match_callback,
(void*) &callback_args);
}
else
{
- match_callback(
+ _yr_rules_match_callback(
data + offset, 0, flags, &callback_args);
}
@@ -617,7 +677,7 @@ int _yr_scan_verify_literal_match(
callback_args.full_word = STRING_IS_FULL_WORD(string);
callback_args.tidx = yr_get_tidx();
- match_callback(
+ _yr_rules_match_callback(
data + offset, 0, flags, &callback_args);
}
@@ -783,6 +843,8 @@ void _yr_rules_clean_matches(
{
string->matches[tidx].head = NULL;
string->matches[tidx].tail = NULL;
+ string->unconfirmed_matches[tidx].head = NULL;
+ string->unconfirmed_matches[tidx].tail = NULL;
string++;
}
diff --git a/libyara/yara.h b/libyara/yara.h
index b0eec6c..3512495 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -56,40 +56,35 @@ typedef pthread_mutex_t mutex_t;
#endif
#define ERROR_INSUFICIENT_MEMORY 1
-#define ERROR_DUPLICATE_RULE_IDENTIFIER 2
-#define ERROR_INVALID_HEX_STRING 3
-#define ERROR_UNDEFINED_STRING 4
-#define ERROR_UNDEFINED_IDENTIFIER 5
-#define ERROR_COULD_NOT_OPEN_FILE 6
-#define ERROR_INVALID_REGULAR_EXPRESSION 7
-#define ERROR_SYNTAX_ERROR 8
-#define ERROR_DUPLICATE_TAG_IDENTIFIER 9
-#define ERROR_UNREFERENCED_STRING 10
-#define ERROR_DUPLICATE_STRING_IDENTIFIER 11
-#define ERROR_CALLBACK_ERROR 12
-#define ERROR_MISPLACED_OR_OPERATOR 13
-#define ERROR_INVALID_OR_OPERATION_SYNTAX 14
-#define ERROR_SKIP_INSIDE_OR_OPERATION 15
-#define ERROR_NESTED_OR_OPERATION 16
-#define ERROR_MISPLACED_ANONYMOUS_STRING 17
-#define ERROR_COULD_NOT_MAP_FILE 18
-#define ERROR_ZERO_LENGTH_FILE 19
-#define ERROR_INVALID_ARGUMENT 20
-#define ERROR_DUPLICATE_META_IDENTIFIER 21
+#define ERROR_COULD_NOT_ATTACH_TO_PROCESS 2
+#define ERROR_COULD_NOT_OPEN_FILE 3
+#define ERROR_COULD_NOT_MAP_FILE 4
+#define ERROR_ZERO_LENGTH_FILE 5
+#define ERROR_INVALID_FILE 6
+#define ERROR_CORRUPT_FILE 7
+#define ERROR_UNSUPPORTED_FILE_VERSION 8
+#define ERROR_INVALID_REGULAR_EXPRESSION 9
+#define ERROR_INVALID_HEX_STRING 10
+#define ERROR_SYNTAX_ERROR 11
+#define ERROR_LOOP_NESTING_LIMIT_EXCEEDED 12
+#define ERROR_DUPLICATE_LOOP_IDENTIFIER 13
+#define ERROR_DUPLICATE_RULE_IDENTIFIER 14
+#define ERROR_DUPLICATE_TAG_IDENTIFIER 15
+#define ERROR_DUPLICATE_META_IDENTIFIER 16
+#define ERROR_DUPLICATE_STRING_IDENTIFIER 17
+#define ERROR_UNREFERENCED_STRING 18
+#define ERROR_UNDEFINED_STRING 19
+#define ERROR_UNDEFINED_IDENTIFIER 20
+#define ERROR_MISPLACED_ANONYMOUS_STRING 21
#define ERROR_INCLUDES_CIRCULAR_REFERENCE 22
-#define ERROR_INCORRECT_VARIABLE_TYPE 23
-#define ERROR_COULD_NOT_ATTACH_TO_PROCESS 24
-#define ERROR_VECTOR_TOO_LONG 25
-#define ERROR_INCLUDE_DEPTH_EXCEEDED 26
-#define ERROR_INVALID_FILE 27
-#define ERROR_CORRUPT_FILE 28
-#define ERROR_UNSUPPORTED_FILE_VERSION 29
-#define ERROR_EXEC_STACK_OVERFLOW 30
-#define ERROR_SCAN_TIMEOUT 31
-#define ERROR_LOOP_NESTING_LIMIT_EXCEEDED 32
-#define ERROR_DUPLICATE_LOOP_IDENTIFIER 33
-#define ERROR_TOO_MANY_SCAN_THREADS 34
-#define ERROR_INTERNAL_FATAL_ERROR 35
+#define ERROR_INCLUDE_DEPTH_EXCEEDED 23
+#define ERROR_INCORRECT_VARIABLE_TYPE 24
+#define ERROR_EXEC_STACK_OVERFLOW 25
+#define ERROR_SCAN_TIMEOUT 26
+#define ERROR_TOO_MANY_SCAN_THREADS 27
+#define ERROR_CALLBACK_ERROR 28
+#define ERROR_INVALID_ARGUMENT 29
+#define ERROR_INTERNAL_FATAL_ERROR 30
#define CALLBACK_MSG_RULE_MATCHING 1
@@ -169,6 +164,8 @@ typedef pthread_mutex_t mutex_t;
#define STRING_GFLAGS_LITERAL 0x400
#define STRING_GFLAGS_FITS_IN_ATOM 0x800
#define STRING_GFLAGS_NULL 0x1000
+#define STRING_GFLAGS_CHAIN_PART 0x2000
+#define STRING_GFLAGS_CHAIN_TAIL 0x4000
#define STRING_IS_HEX(x) \
(((x)->g_flags) & STRING_GFLAGS_HEXADECIMAL)
@@ -203,6 +200,12 @@ typedef pthread_mutex_t mutex_t;
#define STRING_IS_FAST_HEX_REGEXP(x) \
(((x)->g_flags) & STRING_GFLAGS_FAST_HEX_REGEXP)
+#define STRING_IS_CHAIN_PART(x) \
+ (((x)->g_flags) & STRING_GFLAGS_CHAIN_PART)
+
+#define STRING_IS_CHAIN_TAIL(x) \
+ (((x)->g_flags) & STRING_GFLAGS_CHAIN_TAIL)
+
#define STRING_IS_NULL(x) \
((x) == NULL || ((x)->g_flags) & STRING_GFLAGS_NULL)
@@ -330,6 +333,14 @@ typedef struct _YR_META
} YR_META;
+typedef struct _YR_MATCHES
+{
+ DECLARE_REFERENCE(YR_MATCH*, head);
+ DECLARE_REFERENCE(YR_MATCH*, tail);
+
+} YR_MATCHES;
+
+
typedef struct _YR_STRING
{
int32_t g_flags;
@@ -337,11 +348,10 @@ typedef struct _YR_STRING
DECLARE_REFERENCE(char*, identifier);
DECLARE_REFERENCE(uint8_t*, string);
+ DECLARE_REFERENCE(struct _YR_STRING*, chained_to);
- struct {
- DECLARE_REFERENCE(YR_MATCH*, head);
- DECLARE_REFERENCE(YR_MATCH*, tail);
- } matches[MAX_THREADS];
+ YR_MATCHES matches[MAX_THREADS];
+ YR_MATCHES unconfirmed_matches[MAX_THREADS];
} YR_STRING;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list