[Forensics-changes] [yara] 229/415: Implement fast matching algorithm for hex strings and some other improvements
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:43:09 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit c6a39aa9bd827b04c2a285dc57ec171f49b450f9
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Wed Nov 20 14:07:46 2013 +0000
Implement fast matching algorithm for hex strings and some other improvements
---
libyara/Makefile.am | 2 +-
libyara/atoms.c | 2 +-
libyara/atoms.h | 2 +-
libyara/hex_grammar.c | 44 +++++++++-------
libyara/hex_grammar.h | 2 +-
libyara/hex_grammar.y | 6 +++
libyara/hex_lexer.c | 53 +++++++++----------
libyara/hex_lexer.l | 21 ++++----
libyara/parser.c | 16 +++---
libyara/re.c | 2 +
libyara/re.h | 20 ++++----
libyara/rules.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++--
libyara/yara.h | 36 +++++++------
yara.c | 20 ++++----
14 files changed, 258 insertions(+), 107 deletions(-)
diff --git a/libyara/Makefile.am b/libyara/Makefile.am
index b345962..09eefbc 100644
--- a/libyara/Makefile.am
+++ b/libyara/Makefile.am
@@ -1,6 +1,6 @@
AM_YFLAGS=-d
-AM_CFLAGS=-g -O0 -std=gnu99
+AM_CFLAGS=-g -O4 -std=gnu99
ACLOCAL_AMFLAGS=-I m4
diff --git a/libyara/atoms.c b/libyara/atoms.c
index 03b6882..f474cfc 100644
--- a/libyara/atoms.c
+++ b/libyara/atoms.c
@@ -1034,7 +1034,7 @@ int yr_atoms_extract_from_re(
//
int yr_atoms_extract_from_string(
- char* string,
+ uint8_t* string,
int string_length,
int flags,
ATOM_LIST_ITEM** atoms)
diff --git a/libyara/atoms.h b/libyara/atoms.h
index f70af65..b46db85 100644
--- a/libyara/atoms.h
+++ b/libyara/atoms.h
@@ -59,7 +59,7 @@ int yr_atoms_extract_from_re(
ATOM_LIST_ITEM** atoms);
int yr_atoms_extract_from_string(
- char* string,
+ uint8_t* string,
int string_length,
int flags,
ATOM_LIST_ITEM** atoms);
diff --git a/libyara/hex_grammar.c b/libyara/hex_grammar.c
index 0b408ca..bba5dc6 100644
--- a/libyara/hex_grammar.c
+++ b/libyara/hex_grammar.c
@@ -105,6 +105,9 @@
#define mark_as_not_literal() \
((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
+#define mark_as_not_fast_hex_regexp() \
+ ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
+
#if YYDEBUG
yydebug = 1;
#endif
@@ -139,13 +142,13 @@ yydebug = 1;
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 58 "hex_grammar.y"
+#line 61 "hex_grammar.y"
{
int integer;
RE_NODE *re_node;
}
/* Line 193 of yacc.c. */
-#line 149 "hex_grammar.c"
+#line 152 "hex_grammar.c"
YYSTYPE;
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
@@ -158,7 +161,7 @@ typedef union YYSTYPE
/* Line 216 of yacc.c. */
-#line 162 "hex_grammar.c"
+#line 165 "hex_grammar.c"
#ifdef short
# undef short
@@ -444,8 +447,8 @@ static const yytype_int8 yyrhs[] =
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
static const yytype_uint8 yyrline[] =
{
- 0, 71, 71, 79, 83, 91, 95, 99, 107, 120,
- 144, 148, 157, 180
+ 0, 74, 74, 82, 86, 94, 98, 102, 110, 123,
+ 147, 151, 162, 186
};
#endif
@@ -1369,7 +1372,7 @@ yyreduce:
switch (yyn)
{
case 2:
-#line 72 "hex_grammar.y"
+#line 75 "hex_grammar.y"
{
RE* re = yyget_extra(yyscanner);
re->root_node = (yyvsp[(2) - (3)].re_node);
@@ -1377,14 +1380,14 @@ yyreduce:
break;
case 3:
-#line 80 "hex_grammar.y"
+#line 83 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 4:
-#line 84 "hex_grammar.y"
+#line 87 "hex_grammar.y"
{
(yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1392,21 +1395,21 @@ yyreduce:
break;
case 5:
-#line 92 "hex_grammar.y"
+#line 95 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 6:
-#line 96 "hex_grammar.y"
+#line 99 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(2) - (3)].re_node);
}
break;
case 7:
-#line 100 "hex_grammar.y"
+#line 103 "hex_grammar.y"
{
mark_as_not_literal();
(yyval.re_node) = (yyvsp[(2) - (3)].re_node);
@@ -1414,7 +1417,7 @@ yyreduce:
break;
case 8:
-#line 108 "hex_grammar.y"
+#line 111 "hex_grammar.y"
{
RE_NODE* re_any;
@@ -1430,7 +1433,7 @@ yyreduce:
break;
case 9:
-#line 121 "hex_grammar.y"
+#line 124 "hex_grammar.y"
{
RE_NODE* re_any;
@@ -1454,16 +1457,18 @@ yyreduce:
break;
case 10:
-#line 145 "hex_grammar.y"
+#line 148 "hex_grammar.y"
{
(yyval.re_node) = (yyvsp[(1) - (1)].re_node);
}
break;
case 11:
-#line 149 "hex_grammar.y"
+#line 152 "hex_grammar.y"
{
mark_as_not_literal();
+ mark_as_not_fast_hex_regexp();
+
(yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1471,7 +1476,7 @@ yyreduce:
break;
case 12:
-#line 158 "hex_grammar.y"
+#line 163 "hex_grammar.y"
{
RE* re = yyget_extra(yyscanner);
@@ -1484,6 +1489,7 @@ yyreduce:
if (re->literal_string_len == re->literal_string_max)
{
re->literal_string_max *= 2;
+
re->literal_string = yr_realloc(
re->literal_string,
re->literal_string_max);
@@ -1497,7 +1503,7 @@ yyreduce:
break;
case 13:
-#line 181 "hex_grammar.y"
+#line 187 "hex_grammar.y"
{
uint8_t mask = (yyvsp[(1) - (1)].integer) >> 8;
@@ -1523,7 +1529,7 @@ yyreduce:
/* Line 1267 of yacc.c. */
-#line 1527 "hex_grammar.c"
+#line 1533 "hex_grammar.c"
default: break;
}
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1737,7 +1743,7 @@ yyreturn:
}
-#line 204 "hex_grammar.y"
+#line 210 "hex_grammar.y"
diff --git a/libyara/hex_grammar.h b/libyara/hex_grammar.h
index 06ad3f0..6cd377e 100644
--- a/libyara/hex_grammar.h
+++ b/libyara/hex_grammar.h
@@ -54,7 +54,7 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
-#line 58 "hex_grammar.y"
+#line 61 "hex_grammar.y"
{
int integer;
RE_NODE *re_node;
diff --git a/libyara/hex_grammar.y b/libyara/hex_grammar.y
index 4d46396..a67fde2 100644
--- a/libyara/hex_grammar.y
+++ b/libyara/hex_grammar.y
@@ -30,6 +30,9 @@ limitations under the License.
#define mark_as_not_literal() \
((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
+#define mark_as_not_fast_hex_regexp() \
+ ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
+
#if YYDEBUG
yydebug = 1;
#endif
@@ -148,6 +151,8 @@ alternatives : tokens
| alternatives '|' tokens
{
mark_as_not_literal();
+ mark_as_not_fast_hex_regexp();
+
$$ = yr_re_node_create(RE_NODE_ALT, $1, $3);
ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -167,6 +172,7 @@ byte : _BYTE_
if (re->literal_string_len == re->literal_string_max)
{
re->literal_string_max *= 2;
+
re->literal_string = yr_realloc(
re->literal_string,
re->literal_string_max);
diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c
index 0efa4c8..e8f871b 100644
--- a/libyara/hex_lexer.c
+++ b/libyara/hex_lexer.c
@@ -489,11 +489,6 @@ limitations under the License.
#define snprintf _snprintf
#endif
-
-
-
-
-
#define ERROR_IF(x, error) \
if (x) \
{ \
@@ -504,7 +499,7 @@ limitations under the License.
#define YY_NO_UNISTD_H 1
-#line 508 "hex_lexer.c"
+#line 503 "hex_lexer.c"
#define INITIAL 0
#define range 1
@@ -738,11 +733,11 @@ YY_DECL
register int yy_act;
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-#line 66 "hex_lexer.l"
+#line 61 "hex_lexer.l"
-#line 746 "hex_lexer.c"
+#line 741 "hex_lexer.c"
yylval = yylval_param;
@@ -841,7 +836,7 @@ do_action: /* This label is used only to access EOF actions. */
case 1:
YY_RULE_SETUP
-#line 69 "hex_lexer.l"
+#line 64 "hex_lexer.l"
{
yylval->integer = xtoi(yytext);
@@ -850,7 +845,7 @@ YY_RULE_SETUP
YY_BREAK
case 2:
YY_RULE_SETUP
-#line 75 "hex_lexer.l"
+#line 70 "hex_lexer.l"
{
yytext[1] = '0'; // replace ? by 0
@@ -860,7 +855,7 @@ YY_RULE_SETUP
YY_BREAK
case 3:
YY_RULE_SETUP
-#line 82 "hex_lexer.l"
+#line 77 "hex_lexer.l"
{
yytext[0] = '0'; // replace ? by 0
@@ -870,7 +865,7 @@ YY_RULE_SETUP
YY_BREAK
case 4:
YY_RULE_SETUP
-#line 89 "hex_lexer.l"
+#line 84 "hex_lexer.l"
{
yylval->integer = 0x0000;
@@ -879,7 +874,7 @@ YY_RULE_SETUP
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 95 "hex_lexer.l"
+#line 90 "hex_lexer.l"
{
BEGIN(range);
@@ -888,14 +883,14 @@ YY_RULE_SETUP
YY_BREAK
case 6:
YY_RULE_SETUP
-#line 101 "hex_lexer.l"
+#line 96 "hex_lexer.l"
{
return yytext[0];
}
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 105 "hex_lexer.l"
+#line 100 "hex_lexer.l"
{
yylval->integer = atoi(yytext);
@@ -911,7 +906,7 @@ YY_RULE_SETUP
YY_BREAK
case 8:
YY_RULE_SETUP
-#line 118 "hex_lexer.l"
+#line 113 "hex_lexer.l"
{
BEGIN(INITIAL);
@@ -921,12 +916,12 @@ YY_RULE_SETUP
case 9:
/* rule 9 can match eol */
YY_RULE_SETUP
-#line 125 "hex_lexer.l"
+#line 120 "hex_lexer.l"
// skip whitespace
YY_BREAK
case 10:
YY_RULE_SETUP
-#line 128 "hex_lexer.l"
+#line 123 "hex_lexer.l"
{
if (yytext[0] >= 32 && yytext[0] < 127)
@@ -942,10 +937,10 @@ YY_RULE_SETUP
YY_BREAK
case 11:
YY_RULE_SETUP
-#line 141 "hex_lexer.l"
+#line 136 "hex_lexer.l"
ECHO;
YY_BREAK
-#line 949 "hex_lexer.c"
+#line 944 "hex_lexer.c"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(range):
yyterminate();
@@ -2123,7 +2118,7 @@ void hex_yyfree (void * ptr , yyscan_t yyscanner)
#define YYTABLES_NAME "yytables"
-#line 141 "hex_lexer.l"
+#line 136 "hex_lexer.l"
@@ -2149,13 +2144,19 @@ int yr_parse_hex_string(
FAIL_ON_ERROR(yr_re_create(re));
- // The RE_FLAGS_LITERAL_STRING flag indicates that the
- // regular expression is just a literal string and it can
- // be matched by doing a simple string comparison, without
- // executing any regular expression code. We initially set
- // this flag which is unset later during parsing if necessary.
+ // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
+ // is just a literal string and it can be matched by doing a simple string
+ // comparison, without executing any regular expression code.
+ //
+ // The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
+ // from a hex string that can be matched by faster algorithm. These regular
+ // expressions come from hex strings not contaning alternatives
+ // (like in 01 02 | 03 04).
+ //
+ // These flags are unset later during parsing if necessary.
(*re)->flags |= RE_FLAGS_LITERAL_STRING;
+ (*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
hex_yylex_init(&yyscanner);
hex_yyset_extra(*re,yyscanner);
diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l
index 43476ed..df4d5a0 100644
--- a/libyara/hex_lexer.l
+++ b/libyara/hex_lexer.l
@@ -31,11 +31,6 @@ limitations under the License.
#define snprintf _snprintf
#endif
-
-
-
-
-
#define ERROR_IF(x, error) \
if (x) \
{ \
@@ -162,13 +157,19 @@ int yr_parse_hex_string(
FAIL_ON_ERROR(yr_re_create(re));
- // The RE_FLAGS_LITERAL_STRING flag indicates that the
- // regular expression is just a literal string and it can
- // be matched by doing a simple string comparison, without
- // executing any regular expression code. We initially set
- // this flag which is unset later during parsing if necessary.
+ // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
+ // is just a literal string and it can be matched by doing a simple string
+ // comparison, without executing any regular expression code.
+ //
+ // The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
+ // from a hex string that can be matched by faster algorithm. These regular
+ // expressions come from hex strings not contaning alternatives
+ // (like in 01 02 | 03 04).
+ //
+ // These flags are unset later during parsing if necessary.
(*re)->flags |= RE_FLAGS_LITERAL_STRING;
+ (*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
yylex_init(&yyscanner);
yyset_extra(*re, yyscanner);
diff --git a/libyara/parser.c b/libyara/parser.c
index a67a158..2984fb4 100644
--- a/libyara/parser.c
+++ b/libyara/parser.c
@@ -217,6 +217,7 @@ STRING* yr_parser_reduce_string_declaration(
RE* re = NULL;
uint8_t* literal_string;
+
int literal_string_len;
int max_string_len;
@@ -289,17 +290,15 @@ STRING* yr_parser_reduce_string_declaration(
goto _exit;
}
- //
- //yr_re_print(re);
- //printf("\n");
- //
-
if (re->flags & RE_FLAGS_START_ANCHORED)
string->g_flags |= STRING_GFLAGS_START_ANCHORED;
if (re->flags & RE_FLAGS_END_ANCHORED)
string->g_flags |= STRING_GFLAGS_END_ANCHORED;
+ if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
+ string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;
+
if (re->flags & RE_FLAGS_LITERAL_STRING)
{
string->g_flags |= STRING_GFLAGS_LITERAL;
@@ -307,10 +306,7 @@ STRING* yr_parser_reduce_string_declaration(
literal_string_len = re->literal_string_len;
compiler->last_result = yr_atoms_extract_from_string(
- re->literal_string,
- re->literal_string_len,
- string->g_flags,
- &atom_list);
+ literal_string, literal_string_len, string->g_flags, &atom_list);
}
else
{
@@ -334,7 +330,7 @@ STRING* yr_parser_reduce_string_declaration(
literal_string_len = str->length;
compiler->last_result = yr_atoms_extract_from_string(
- str->c_string, str->length, string->g_flags, &atom_list);
+ literal_string, literal_string_len, string->g_flags, &atom_list);
}
if (compiler->last_result != ERROR_SUCCESS)
diff --git a/libyara/re.c b/libyara/re.c
index cb10034..ad9335f 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -196,6 +196,8 @@ RE_NODE* yr_re_node_create(
result->left = left;
result->right = right;
result->greedy = TRUE;
+ result->forward_code = NULL;
+ result->backward_code = NULL;
}
return result;
diff --git a/libyara/re.h b/libyara/re.h
index 72d2519..5239d76 100644
--- a/libyara/re.h
+++ b/libyara/re.h
@@ -57,14 +57,16 @@ limitations under the License.
#define RE_OPCODE_JUMP 0xB5
-#define RE_FLAGS_START_ANCHORED 0x01
-#define RE_FLAGS_END_ANCHORED 0x02
-#define RE_FLAGS_LITERAL_STRING 0x04
-#define RE_FLAGS_BACKWARDS 0x08
-#define RE_FLAGS_EXHAUSTIVE 0x10
-#define RE_FLAGS_WIDE 0x20
-#define RE_FLAGS_NO_CASE 0x40
-#define RE_FLAGS_SCAN 0x80
+#define RE_FLAGS_START_ANCHORED 0x01
+#define RE_FLAGS_END_ANCHORED 0x02
+#define RE_FLAGS_LITERAL_STRING 0x04
+#define RE_FLAGS_FAST_HEX_REGEXP 0x08
+#define RE_FLAGS_BACKWARDS 0x10
+#define RE_FLAGS_EXHAUSTIVE 0x20
+#define RE_FLAGS_WIDE 0x40
+#define RE_FLAGS_NO_CASE 0x80
+#define RE_FLAGS_SCAN 0x100
+
typedef struct RE RE;
typedef struct RE_NODE RE_NODE;
@@ -110,10 +112,8 @@ struct RE {
int error_code;
int8_t* literal_string;
-
int literal_string_len;
int literal_string_max;
-
};
diff --git a/libyara/rules.c b/libyara/rules.c
index aebba1c..1e100b6 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -133,6 +133,123 @@ inline int _yr_scan_wicompare(
}
+int _yr_scan_fast_hex_re_exec(
+ uint8_t* code,
+ uint8_t* input,
+ size_t input_size,
+ int flags,
+ RE_MATCH_CALLBACK_FUNC callback,
+ void* callback_args)
+{
+ uint8_t* code_stack[100];
+ uint8_t* input_stack[100];
+ int matches_stack[100];
+
+ int sp = 0;
+
+ uint8_t* ip = code;
+ uint8_t* current_input = input;
+ uint8_t mask;
+ uint8_t value;
+
+ int i;
+ int matches;
+ int offset;
+ int stop;
+ int increment;
+
+ increment = flags & RE_FLAGS_BACKWARDS ? -1 : 1;
+
+ code_stack[sp] = code;
+ input_stack[sp] = input;
+ matches_stack[sp] = 0;
+ sp++;
+
+ while (sp > 0)
+ {
+ sp--;
+ ip = code_stack[sp];
+ current_input = input_stack[sp];
+ matches = matches_stack[sp];
+ stop = FALSE;
+
+ while(!stop)
+ {
+ switch(*ip)
+ {
+ case RE_OPCODE_LITERAL:
+ if (*current_input == *(ip + 1))
+ {
+ matches++;
+ current_input += increment;
+ ip += 2;
+ }
+ else
+ {
+ stop = TRUE;
+ }
+ break;
+
+ case RE_OPCODE_MASKED_LITERAL:
+ value = *(int16_t*)(ip + 1) & 0xFF;
+ mask = *(int16_t*)(ip + 1) >> 8;
+ if ((*current_input & mask) == value)
+ {
+ matches++;
+ current_input += increment;
+ ip += 3;
+ }
+ else
+ {
+ stop = TRUE;
+ }
+ break;
+
+ case RE_OPCODE_ANY:
+ matches++;
+ current_input += increment;
+ ip += 1;
+ break;
+
+ case RE_OPCODE_PUSH:
+ for (i = *(uint16_t*)(ip + 1); i > 0; i--)
+ {
+ offset = flags & RE_FLAGS_BACKWARDS ? -i : i;
+ code_stack[sp] = ip + 11;
+ input_stack[sp] = current_input + offset;
+ matches_stack[sp] = matches + i;
+ sp++;
+ }
+
+ ip += 11;
+ break;
+
+ default:
+ assert(FALSE);
+ }
+
+ if (*ip == RE_OPCODE_MATCH)
+ {
+ if (flags & RE_FLAGS_EXHAUSTIVE)
+ {
+ callback(
+ flags & RE_FLAGS_BACKWARDS ? current_input + 1 : input,
+ matches,
+ flags,
+ callback_args);
+ stop = TRUE;
+ }
+ else
+ {
+ return matches;
+ }
+ }
+ }
+ }
+
+ return -1;
+}
+
void match_callback(
uint8_t* match_data,
int match_length,
@@ -264,6 +381,16 @@ void match_callback(
}
+
+typedef int (*RE_EXEC_FUNC)(
+ uint8_t* code,
+ uint8_t* input,
+ size_t input_size,
+ int flags,
+ RE_MATCH_CALLBACK_FUNC callback,
+ void* callback_args);
+
+
int _yr_scan_verify_re_match(
AC_MATCH* ac_match,
uint8_t* data,
@@ -272,10 +399,16 @@ int _yr_scan_verify_re_match(
ARENA* matches_arena)
{
CALLBACK_ARGS callback_args;
+ RE_EXEC_FUNC exec;
int forward_matches = -1;
int flags = 0;
+ if (STRING_IS_FAST_HEX_REGEXP(ac_match->string))
+ exec = _yr_scan_fast_hex_re_exec;
+ else
+ exec = yr_re_exec;
+
if (STRING_IS_START_ANCHORED(ac_match->string))
flags |= RE_FLAGS_START_ANCHORED;
@@ -287,7 +420,7 @@ int _yr_scan_verify_re_match(
if (STRING_IS_ASCII(ac_match->string))
{
- forward_matches = yr_re_exec(
+ forward_matches = exec(
ac_match->forward_code,
data + offset,
data_size - offset,
@@ -300,7 +433,7 @@ int _yr_scan_verify_re_match(
forward_matches < 0)
{
flags |= RE_FLAGS_WIDE;
- forward_matches = yr_re_exec(
+ forward_matches = exec(
ac_match->forward_code,
data + offset,
data_size - offset,
@@ -325,7 +458,7 @@ int _yr_scan_verify_re_match(
if (ac_match->backward_code != NULL)
{
- yr_re_exec(
+ exec(
ac_match->backward_code,
data + offset,
offset + 1,
diff --git a/libyara/yara.h b/libyara/yara.h
index 1afb520..fb16074 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -152,22 +152,23 @@ typedef pthread_mutex_t mutex_t;
((x) != NULL ? (x)->type == EXTERNAL_VARIABLE_TYPE_NULL : TRUE)
-#define STRING_TFLAGS_FOUND 0x01
-
-#define STRING_GFLAGS_REFERENCED 0x01
-#define STRING_GFLAGS_HEXADECIMAL 0x02
-#define STRING_GFLAGS_NO_CASE 0x04
-#define STRING_GFLAGS_ASCII 0x08
-#define STRING_GFLAGS_WIDE 0x10
-#define STRING_GFLAGS_REGEXP 0x20
-#define STRING_GFLAGS_FULL_WORD 0x40
-#define STRING_GFLAGS_ANONYMOUS 0x80
-#define STRING_GFLAGS_SINGLE_MATCH 0x100
-#define STRING_GFLAGS_LITERAL 0x200
-#define STRING_GFLAGS_START_ANCHORED 0x400
-#define STRING_GFLAGS_END_ANCHORED 0x800
-#define STRING_GFLAGS_FITS_IN_ATOM 0x1000
-#define STRING_GFLAGS_NULL 0x2000
+#define STRING_TFLAGS_FOUND 0x01
+
+#define STRING_GFLAGS_REFERENCED 0x01
+#define STRING_GFLAGS_HEXADECIMAL 0x02
+#define STRING_GFLAGS_NO_CASE 0x04
+#define STRING_GFLAGS_ASCII 0x08
+#define STRING_GFLAGS_WIDE 0x10
+#define STRING_GFLAGS_REGEXP 0x20
+#define STRING_GFLAGS_FAST_HEX_REGEXP 0x40
+#define STRING_GFLAGS_FULL_WORD 0x80
+#define STRING_GFLAGS_ANONYMOUS 0x100
+#define STRING_GFLAGS_SINGLE_MATCH 0x200
+#define STRING_GFLAGS_LITERAL 0x400
+#define STRING_GFLAGS_START_ANCHORED 0x800
+#define STRING_GFLAGS_END_ANCHORED 0x1000
+#define STRING_GFLAGS_FITS_IN_ATOM 0x2000
+#define STRING_GFLAGS_NULL 0x4000
#define STRING_IS_HEX(x) \
(((x)->g_flags) & STRING_GFLAGS_HEXADECIMAL)
@@ -199,6 +200,9 @@ typedef pthread_mutex_t mutex_t;
#define STRING_IS_LITERAL(x) \
(((x)->g_flags) & STRING_GFLAGS_LITERAL)
+#define STRING_IS_FAST_HEX_REGEXP(x) \
+ (((x)->g_flags) & STRING_GFLAGS_FAST_HEX_REGEXP)
+
#define STRING_IS_START_ANCHORED(x) \
(((x)->g_flags) & STRING_GFLAGS_START_ANCHORED)
diff --git a/yara.c b/yara.c
index 31269e3..e322a56 100644
--- a/yara.c
+++ b/yara.c
@@ -129,7 +129,7 @@ EXTERNAL* externals_list = NULL;
// file_queue is size-limited queue stored as a circular array, files are
// removed from queue_head position and new files are added at queue_tail
// position. The array has room for one extra element to avoid queue_head
-// being equal to queue_tail in a full queue. The only situation where
+// being equal to queue_tail in a full queue. The only situation where
// queue_head == queue_tail is when queue is empty.
QUEUED_FILE file_queue[MAX_QUEUED_FILES + 1];
@@ -301,7 +301,7 @@ void scan_dir(
{
file_queue_put(full_path);
}
- else if(recursive &&
+ else if(recursive &&
S_ISDIR(st.st_mode) &&
!S_ISLNK(st.st_mode) &&
de->d_name[0] != '.')
@@ -529,7 +529,7 @@ int handle_message(int message, RULE* rule, void* data)
while (match != NULL)
{
- printf("0x%zx:%s: ", match->first_offset, string->identifier);
+ printf("0x%llx:%s: ", match->first_offset, string->identifier);
if (STRING_IS_HEX(string))
{
@@ -539,7 +539,7 @@ int handle_message(int message, RULE* rule, void* data)
{
print_string(match->data, match->length);
}
-
+
match = match->next;
}
}
@@ -585,7 +585,7 @@ void* scanning_thread(void* param)
file_path = file_queue_get();
- while (file_path != NULL)
+ while (file_path != NULL)
{
result = yr_rules_scan_file(
rules,
@@ -607,6 +607,8 @@ void* scanning_thread(void* param)
file_path = file_queue_get();
}
+ yr_re_finalize_thread();
+
return 0;
}
@@ -979,9 +981,9 @@ int main(
print_scanning_error(result);
}
else if (is_directory(argv[argc - 1]))
- {
+ {
file_queue_init();
-
+
for (i = 0; i < threads; i++)
{
if (create_thread(&thread[i], scanning_thread, (void*) rules) != 0)
@@ -1005,9 +1007,9 @@ int main(
else
{
-
+
start = clock();
-
+
result = yr_rules_scan_file(
rules,
argv[argc - 1],
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list