[Forensics-changes] [yara] 229/415: Implement fast matching algorithm for hex strings and some other improvements

Hilko Bengen bengen at moszumanska.debian.org
Thu Apr 3 05:43:09 UTC 2014


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to branch debian
in repository yara.

commit c6a39aa9bd827b04c2a285dc57ec171f49b450f9
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Wed Nov 20 14:07:46 2013 +0000

    Implement fast matching algorithm for hex strings and some other improvements
---
 libyara/Makefile.am   |   2 +-
 libyara/atoms.c       |   2 +-
 libyara/atoms.h       |   2 +-
 libyara/hex_grammar.c |  44 +++++++++-------
 libyara/hex_grammar.h |   2 +-
 libyara/hex_grammar.y |   6 +++
 libyara/hex_lexer.c   |  53 +++++++++----------
 libyara/hex_lexer.l   |  21 ++++----
 libyara/parser.c      |  16 +++---
 libyara/re.c          |   2 +
 libyara/re.h          |  20 ++++----
 libyara/rules.c       | 139 ++++++++++++++++++++++++++++++++++++++++++++++++--
 libyara/yara.h        |  36 +++++++------
 yara.c                |  20 ++++----
 14 files changed, 258 insertions(+), 107 deletions(-)

diff --git a/libyara/Makefile.am b/libyara/Makefile.am
index b345962..09eefbc 100644
--- a/libyara/Makefile.am
+++ b/libyara/Makefile.am
@@ -1,6 +1,6 @@
 AM_YFLAGS=-d
 
-AM_CFLAGS=-g -O0 -std=gnu99
+AM_CFLAGS=-g -O4 -std=gnu99
 
 ACLOCAL_AMFLAGS=-I m4
 
diff --git a/libyara/atoms.c b/libyara/atoms.c
index 03b6882..f474cfc 100644
--- a/libyara/atoms.c
+++ b/libyara/atoms.c
@@ -1034,7 +1034,7 @@ int yr_atoms_extract_from_re(
 //
 
 int yr_atoms_extract_from_string(
-    char* string,
+    uint8_t* string,
     int string_length,
     int flags,
     ATOM_LIST_ITEM** atoms)
diff --git a/libyara/atoms.h b/libyara/atoms.h
index f70af65..b46db85 100644
--- a/libyara/atoms.h
+++ b/libyara/atoms.h
@@ -59,7 +59,7 @@ int yr_atoms_extract_from_re(
     ATOM_LIST_ITEM** atoms);
 
 int yr_atoms_extract_from_string(
-    char* string,
+    uint8_t* string,
     int string_length,
     int flags,
     ATOM_LIST_ITEM** atoms);
diff --git a/libyara/hex_grammar.c b/libyara/hex_grammar.c
index 0b408ca..bba5dc6 100644
--- a/libyara/hex_grammar.c
+++ b/libyara/hex_grammar.c
@@ -105,6 +105,9 @@
 #define mark_as_not_literal() \
     ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
 
+#define mark_as_not_fast_hex_regexp() \
+    ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
+
 #if YYDEBUG
 yydebug = 1;
 #endif
@@ -139,13 +142,13 @@ yydebug = 1;
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 58 "hex_grammar.y"
+#line 61 "hex_grammar.y"
 {
   int integer;
   RE_NODE *re_node;
 }
 /* Line 193 of yacc.c.  */
-#line 149 "hex_grammar.c"
+#line 152 "hex_grammar.c"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
@@ -158,7 +161,7 @@ typedef union YYSTYPE
 
 
 /* Line 216 of yacc.c.  */
-#line 162 "hex_grammar.c"
+#line 165 "hex_grammar.c"
 
 #ifdef short
 # undef short
@@ -444,8 +447,8 @@ static const yytype_int8 yyrhs[] =
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
 static const yytype_uint8 yyrline[] =
 {
-       0,    71,    71,    79,    83,    91,    95,    99,   107,   120,
-     144,   148,   157,   180
+       0,    74,    74,    82,    86,    94,    98,   102,   110,   123,
+     147,   151,   162,   186
 };
 #endif
 
@@ -1369,7 +1372,7 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 72 "hex_grammar.y"
+#line 75 "hex_grammar.y"
     {
                 RE* re = yyget_extra(yyscanner);
                 re->root_node = (yyvsp[(2) - (3)].re_node);
@@ -1377,14 +1380,14 @@ yyreduce:
     break;
 
   case 3:
-#line 80 "hex_grammar.y"
+#line 83 "hex_grammar.y"
     {
             (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
          }
     break;
 
   case 4:
-#line 84 "hex_grammar.y"
+#line 87 "hex_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1392,21 +1395,21 @@ yyreduce:
     break;
 
   case 5:
-#line 92 "hex_grammar.y"
+#line 95 "hex_grammar.y"
     {
           (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
         }
     break;
 
   case 6:
-#line 96 "hex_grammar.y"
+#line 99 "hex_grammar.y"
     {
           (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
         }
     break;
 
   case 7:
-#line 100 "hex_grammar.y"
+#line 103 "hex_grammar.y"
     {
           mark_as_not_literal();
           (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
@@ -1414,7 +1417,7 @@ yyreduce:
     break;
 
   case 8:
-#line 108 "hex_grammar.y"
+#line 111 "hex_grammar.y"
     {
           RE_NODE* re_any;
 
@@ -1430,7 +1433,7 @@ yyreduce:
     break;
 
   case 9:
-#line 121 "hex_grammar.y"
+#line 124 "hex_grammar.y"
     {
           RE_NODE* re_any;
 
@@ -1454,16 +1457,18 @@ yyreduce:
     break;
 
   case 10:
-#line 145 "hex_grammar.y"
+#line 148 "hex_grammar.y"
     {
                   (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
                }
     break;
 
   case 11:
-#line 149 "hex_grammar.y"
+#line 152 "hex_grammar.y"
     {
                   mark_as_not_literal();
+                  mark_as_not_fast_hex_regexp();
+
                   (yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
 
                   ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1471,7 +1476,7 @@ yyreduce:
     break;
 
   case 12:
-#line 158 "hex_grammar.y"
+#line 163 "hex_grammar.y"
     {
           RE* re = yyget_extra(yyscanner);
 
@@ -1484,6 +1489,7 @@ yyreduce:
           if (re->literal_string_len == re->literal_string_max)
           {
             re->literal_string_max *= 2;
+
             re->literal_string = yr_realloc(
                 re->literal_string,
                 re->literal_string_max);
@@ -1497,7 +1503,7 @@ yyreduce:
     break;
 
   case 13:
-#line 181 "hex_grammar.y"
+#line 187 "hex_grammar.y"
     {
           uint8_t mask = (yyvsp[(1) - (1)].integer) >> 8;
 
@@ -1523,7 +1529,7 @@ yyreduce:
 
 
 /* Line 1267 of yacc.c.  */
-#line 1527 "hex_grammar.c"
+#line 1533 "hex_grammar.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1737,7 +1743,7 @@ yyreturn:
 }
 
 
-#line 204 "hex_grammar.y"
+#line 210 "hex_grammar.y"
 
 
 
diff --git a/libyara/hex_grammar.h b/libyara/hex_grammar.h
index 06ad3f0..6cd377e 100644
--- a/libyara/hex_grammar.h
+++ b/libyara/hex_grammar.h
@@ -54,7 +54,7 @@
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 58 "hex_grammar.y"
+#line 61 "hex_grammar.y"
 {
   int integer;
   RE_NODE *re_node;
diff --git a/libyara/hex_grammar.y b/libyara/hex_grammar.y
index 4d46396..a67fde2 100644
--- a/libyara/hex_grammar.y
+++ b/libyara/hex_grammar.y
@@ -30,6 +30,9 @@ limitations under the License.
 #define mark_as_not_literal() \
     ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
 
+#define mark_as_not_fast_hex_regexp() \
+    ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
+
 #if YYDEBUG
 yydebug = 1;
 #endif
@@ -148,6 +151,8 @@ alternatives : tokens
              | alternatives '|' tokens
                {
                   mark_as_not_literal();
+                  mark_as_not_fast_hex_regexp();
+
                   $$ = yr_re_node_create(RE_NODE_ALT, $1, $3);
 
                   ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -167,6 +172,7 @@ byte  : _BYTE_
           if (re->literal_string_len == re->literal_string_max)
           {
             re->literal_string_max *= 2;
+
             re->literal_string = yr_realloc(
                 re->literal_string,
                 re->literal_string_max);
diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c
index 0efa4c8..e8f871b 100644
--- a/libyara/hex_lexer.c
+++ b/libyara/hex_lexer.c
@@ -489,11 +489,6 @@ limitations under the License.
 #define snprintf _snprintf
 #endif
 
-
-
-
-
-
 #define ERROR_IF(x, error) \
     if (x) \
     { \
@@ -504,7 +499,7 @@ limitations under the License.
 
 #define YY_NO_UNISTD_H 1
 
-#line 508 "hex_lexer.c"
+#line 503 "hex_lexer.c"
 
 #define INITIAL 0
 #define range 1
@@ -738,11 +733,11 @@ YY_DECL
 	register int yy_act;
     struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-#line 66 "hex_lexer.l"
+#line 61 "hex_lexer.l"
 
 
 
-#line 746 "hex_lexer.c"
+#line 741 "hex_lexer.c"
 
     yylval = yylval_param;
 
@@ -841,7 +836,7 @@ do_action:	/* This label is used only to access EOF actions. */
 
 case 1:
 YY_RULE_SETUP
-#line 69 "hex_lexer.l"
+#line 64 "hex_lexer.l"
 {
 
   yylval->integer = xtoi(yytext);
@@ -850,7 +845,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 2:
 YY_RULE_SETUP
-#line 75 "hex_lexer.l"
+#line 70 "hex_lexer.l"
 {
 
   yytext[1] = '0'; // replace ? by 0
@@ -860,7 +855,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 3:
 YY_RULE_SETUP
-#line 82 "hex_lexer.l"
+#line 77 "hex_lexer.l"
 {
 
   yytext[0] = '0'; // replace ? by 0
@@ -870,7 +865,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 4:
 YY_RULE_SETUP
-#line 89 "hex_lexer.l"
+#line 84 "hex_lexer.l"
 {
 
   yylval->integer = 0x0000;
@@ -879,7 +874,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 5:
 YY_RULE_SETUP
-#line 95 "hex_lexer.l"
+#line 90 "hex_lexer.l"
 {
 
   BEGIN(range);
@@ -888,14 +883,14 @@ YY_RULE_SETUP
 	YY_BREAK
 case 6:
 YY_RULE_SETUP
-#line 101 "hex_lexer.l"
+#line 96 "hex_lexer.l"
 {
   return yytext[0];
 }
 	YY_BREAK
 case 7:
 YY_RULE_SETUP
-#line 105 "hex_lexer.l"
+#line 100 "hex_lexer.l"
 {
 
   yylval->integer = atoi(yytext);
@@ -911,7 +906,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 8:
 YY_RULE_SETUP
-#line 118 "hex_lexer.l"
+#line 113 "hex_lexer.l"
 {
 
   BEGIN(INITIAL);
@@ -921,12 +916,12 @@ YY_RULE_SETUP
 case 9:
 /* rule 9 can match eol */
 YY_RULE_SETUP
-#line 125 "hex_lexer.l"
+#line 120 "hex_lexer.l"
 // skip whitespace
 	YY_BREAK
 case 10:
 YY_RULE_SETUP
-#line 128 "hex_lexer.l"
+#line 123 "hex_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -942,10 +937,10 @@ YY_RULE_SETUP
 	YY_BREAK
 case 11:
 YY_RULE_SETUP
-#line 141 "hex_lexer.l"
+#line 136 "hex_lexer.l"
 ECHO;
 	YY_BREAK
-#line 949 "hex_lexer.c"
+#line 944 "hex_lexer.c"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(range):
 	yyterminate();
@@ -2123,7 +2118,7 @@ void hex_yyfree (void * ptr , yyscan_t yyscanner)
 
 #define YYTABLES_NAME "yytables"
 
-#line 141 "hex_lexer.l"
+#line 136 "hex_lexer.l"
 
 
 
@@ -2149,13 +2144,19 @@ int yr_parse_hex_string(
 
   FAIL_ON_ERROR(yr_re_create(re));
 
-  // The RE_FLAGS_LITERAL_STRING flag indicates that the
-  // regular expression is just a literal string and it can
-  // be matched by doing a simple string comparison, without
-  // executing any regular expression code. We initially set
-  // this flag which is unset later during parsing if necessary.
+  // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
+  // is just a literal string and it can be matched by doing a simple string
+  // comparison, without executing any regular expression code.
+  //
+  // The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
+  // from a hex string that can be matched by faster algorithm. These regular
+  // expressions come from hex strings not contaning alternatives
+  // (like in 01 02 | 03 04).
+  //
+  // These flags are unset later during parsing if necessary.
 
   (*re)->flags |= RE_FLAGS_LITERAL_STRING;
+  (*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
 
   hex_yylex_init(&yyscanner);
   hex_yyset_extra(*re,yyscanner);
diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l
index 43476ed..df4d5a0 100644
--- a/libyara/hex_lexer.l
+++ b/libyara/hex_lexer.l
@@ -31,11 +31,6 @@ limitations under the License.
 #define snprintf _snprintf
 #endif
 
-
-
-
-
-
 #define ERROR_IF(x, error) \
     if (x) \
     { \
@@ -162,13 +157,19 @@ int yr_parse_hex_string(
 
   FAIL_ON_ERROR(yr_re_create(re));
 
-  // The RE_FLAGS_LITERAL_STRING flag indicates that the
-  // regular expression is just a literal string and it can
-  // be matched by doing a simple string comparison, without
-  // executing any regular expression code. We initially set
-  // this flag which is unset later during parsing if necessary.
+  // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
+  // is just a literal string and it can be matched by doing a simple string
+  // comparison, without executing any regular expression code.
+  //
+  // The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
+  // from a hex string that can be matched by faster algorithm. These regular
+  // expressions come from hex strings not contaning alternatives
+  // (like in 01 02 | 03 04).
+  //
+  // These flags are unset later during parsing if necessary.
 
   (*re)->flags |= RE_FLAGS_LITERAL_STRING;
+  (*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
 
   yylex_init(&yyscanner);
   yyset_extra(*re, yyscanner);
diff --git a/libyara/parser.c b/libyara/parser.c
index a67a158..2984fb4 100644
--- a/libyara/parser.c
+++ b/libyara/parser.c
@@ -217,6 +217,7 @@ STRING* yr_parser_reduce_string_declaration(
   RE* re = NULL;
 
   uint8_t* literal_string;
+
   int literal_string_len;
   int max_string_len;
 
@@ -289,17 +290,15 @@ STRING* yr_parser_reduce_string_declaration(
       goto _exit;
     }
 
-    //
-    //yr_re_print(re);
-    //printf("\n");
-    //
-
     if (re->flags & RE_FLAGS_START_ANCHORED)
       string->g_flags |= STRING_GFLAGS_START_ANCHORED;
 
     if (re->flags & RE_FLAGS_END_ANCHORED)
       string->g_flags |= STRING_GFLAGS_END_ANCHORED;
 
+    if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
+      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;
+
     if (re->flags & RE_FLAGS_LITERAL_STRING)
     {
       string->g_flags |= STRING_GFLAGS_LITERAL;
@@ -307,10 +306,7 @@ STRING* yr_parser_reduce_string_declaration(
       literal_string_len = re->literal_string_len;
 
       compiler->last_result = yr_atoms_extract_from_string(
-          re->literal_string,
-          re->literal_string_len,
-          string->g_flags,
-          &atom_list);
+          literal_string, literal_string_len, string->g_flags, &atom_list);
     }
     else
     {
@@ -334,7 +330,7 @@ STRING* yr_parser_reduce_string_declaration(
     literal_string_len = str->length;
 
     compiler->last_result  = yr_atoms_extract_from_string(
-        str->c_string, str->length, string->g_flags, &atom_list);
+        literal_string, literal_string_len, string->g_flags, &atom_list);
   }
 
   if (compiler->last_result != ERROR_SUCCESS)
diff --git a/libyara/re.c b/libyara/re.c
index cb10034..ad9335f 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -196,6 +196,8 @@ RE_NODE* yr_re_node_create(
     result->left = left;
     result->right = right;
     result->greedy = TRUE;
+    result->forward_code = NULL;
+    result->backward_code = NULL;
   }
 
   return result;
diff --git a/libyara/re.h b/libyara/re.h
index 72d2519..5239d76 100644
--- a/libyara/re.h
+++ b/libyara/re.h
@@ -57,14 +57,16 @@ limitations under the License.
 #define RE_OPCODE_JUMP              0xB5
 
 
-#define RE_FLAGS_START_ANCHORED     0x01
-#define RE_FLAGS_END_ANCHORED       0x02
-#define RE_FLAGS_LITERAL_STRING     0x04
-#define RE_FLAGS_BACKWARDS          0x08
-#define RE_FLAGS_EXHAUSTIVE         0x10
-#define RE_FLAGS_WIDE               0x20
-#define RE_FLAGS_NO_CASE            0x40
-#define RE_FLAGS_SCAN               0x80
+#define RE_FLAGS_START_ANCHORED           0x01
+#define RE_FLAGS_END_ANCHORED             0x02
+#define RE_FLAGS_LITERAL_STRING           0x04
+#define RE_FLAGS_FAST_HEX_REGEXP          0x08
+#define RE_FLAGS_BACKWARDS                0x10
+#define RE_FLAGS_EXHAUSTIVE               0x20
+#define RE_FLAGS_WIDE                     0x40
+#define RE_FLAGS_NO_CASE                  0x80
+#define RE_FLAGS_SCAN                     0x100
+
 
 typedef struct RE RE;
 typedef struct RE_NODE RE_NODE;
@@ -110,10 +112,8 @@ struct RE {
   int error_code;
 
   int8_t* literal_string;
-
   int literal_string_len;
   int literal_string_max;
-
 };
 
 
diff --git a/libyara/rules.c b/libyara/rules.c
index aebba1c..1e100b6 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -133,6 +133,123 @@ inline int _yr_scan_wicompare(
 }
 
 
+int _yr_scan_fast_hex_re_exec(
+    uint8_t* code,
+    uint8_t* input,
+    size_t input_size,
+    int flags,
+    RE_MATCH_CALLBACK_FUNC callback,
+    void* callback_args)
+{
+  uint8_t* code_stack[100];
+  uint8_t* input_stack[100];
+  int matches_stack[100];
+
+  int sp = 0;
+
+  uint8_t* ip = code;
+  uint8_t* current_input = input;
+  uint8_t mask;
+  uint8_t value;
+
+  int i;
+  int matches;
+  int offset;
+  int stop;
+  int increment;
+
+  increment = flags & RE_FLAGS_BACKWARDS ? -1 : 1;
+
+  code_stack[sp] = code;
+  input_stack[sp] = input;
+  matches_stack[sp] = 0;
+  sp++;
+
+  while (sp > 0)
+  {
+    sp--;
+    ip = code_stack[sp];
+    current_input = input_stack[sp];
+    matches = matches_stack[sp];
+    stop = FALSE;
+
+    while(!stop)
+    {
+      switch(*ip)
+      {
+        case RE_OPCODE_LITERAL:
+          if (*current_input == *(ip + 1))
+          {
+            matches++;
+            current_input += increment;
+            ip += 2;
+          }
+          else
+          {
+            stop = TRUE;
+          }
+          break;
+
+        case RE_OPCODE_MASKED_LITERAL:
+          value = *(int16_t*)(ip + 1) & 0xFF;
+          mask = *(int16_t*)(ip + 1) >> 8;
+          if ((*current_input & mask) == value)
+          {
+            matches++;
+            current_input += increment;
+            ip += 3;
+          }
+          else
+          {
+            stop = TRUE;
+          }
+          break;
+
+        case RE_OPCODE_ANY:
+          matches++;
+          current_input += increment;
+          ip += 1;
+          break;
+
+        case RE_OPCODE_PUSH:
+          for (i = *(uint16_t*)(ip + 1); i > 0; i--)
+          {
+            offset = flags & RE_FLAGS_BACKWARDS ? -i : i;
+            code_stack[sp] = ip + 11;
+            input_stack[sp] = current_input + offset;
+            matches_stack[sp] = matches + i;
+            sp++;
+          }
+
+          ip += 11;
+          break;
+
+        default:
+          assert(FALSE);
+      }
+
+      if (*ip == RE_OPCODE_MATCH)
+      {
+        if (flags & RE_FLAGS_EXHAUSTIVE)
+        {
+          callback(
+            flags & RE_FLAGS_BACKWARDS ? current_input + 1 : input,
+            matches,
+            flags,
+            callback_args);
+          stop = TRUE;
+        }
+        else
+        {
+          return matches;
+        }
+      }
+    }
+  }
+
+  return -1;
+}
+
 void match_callback(
     uint8_t* match_data,
     int match_length,
@@ -264,6 +381,16 @@ void match_callback(
 }
 
 
+
+typedef int (*RE_EXEC_FUNC)(
+    uint8_t* code,
+    uint8_t* input,
+    size_t input_size,
+    int flags,
+    RE_MATCH_CALLBACK_FUNC callback,
+    void* callback_args);
+
+
 int _yr_scan_verify_re_match(
     AC_MATCH* ac_match,
     uint8_t* data,
@@ -272,10 +399,16 @@ int _yr_scan_verify_re_match(
     ARENA* matches_arena)
 {
   CALLBACK_ARGS callback_args;
+  RE_EXEC_FUNC exec;
 
   int forward_matches = -1;
   int flags = 0;
 
+  if (STRING_IS_FAST_HEX_REGEXP(ac_match->string))
+    exec = _yr_scan_fast_hex_re_exec;
+  else
+    exec = yr_re_exec;
+
   if (STRING_IS_START_ANCHORED(ac_match->string))
     flags |= RE_FLAGS_START_ANCHORED;
 
@@ -287,7 +420,7 @@ int _yr_scan_verify_re_match(
 
   if (STRING_IS_ASCII(ac_match->string))
   {
-    forward_matches = yr_re_exec(
+    forward_matches = exec(
         ac_match->forward_code,
         data + offset,
         data_size - offset,
@@ -300,7 +433,7 @@ int _yr_scan_verify_re_match(
       forward_matches < 0)
   {
     flags |= RE_FLAGS_WIDE;
-    forward_matches = yr_re_exec(
+    forward_matches = exec(
         ac_match->forward_code,
         data + offset,
         data_size - offset,
@@ -325,7 +458,7 @@ int _yr_scan_verify_re_match(
 
   if (ac_match->backward_code != NULL)
   {
-    yr_re_exec(
+    exec(
         ac_match->backward_code,
         data + offset,
         offset + 1,
diff --git a/libyara/yara.h b/libyara/yara.h
index 1afb520..fb16074 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -152,22 +152,23 @@ typedef pthread_mutex_t mutex_t;
     ((x) != NULL ? (x)->type == EXTERNAL_VARIABLE_TYPE_NULL : TRUE)
 
 
-#define STRING_TFLAGS_FOUND          0x01
-
-#define STRING_GFLAGS_REFERENCED     0x01
-#define STRING_GFLAGS_HEXADECIMAL    0x02
-#define STRING_GFLAGS_NO_CASE        0x04
-#define STRING_GFLAGS_ASCII          0x08
-#define STRING_GFLAGS_WIDE           0x10
-#define STRING_GFLAGS_REGEXP         0x20
-#define STRING_GFLAGS_FULL_WORD      0x40
-#define STRING_GFLAGS_ANONYMOUS      0x80
-#define STRING_GFLAGS_SINGLE_MATCH   0x100
-#define STRING_GFLAGS_LITERAL        0x200
-#define STRING_GFLAGS_START_ANCHORED 0x400
-#define STRING_GFLAGS_END_ANCHORED   0x800
-#define STRING_GFLAGS_FITS_IN_ATOM   0x1000
-#define STRING_GFLAGS_NULL           0x2000
+#define STRING_TFLAGS_FOUND             0x01
+
+#define STRING_GFLAGS_REFERENCED        0x01
+#define STRING_GFLAGS_HEXADECIMAL       0x02
+#define STRING_GFLAGS_NO_CASE           0x04
+#define STRING_GFLAGS_ASCII             0x08
+#define STRING_GFLAGS_WIDE              0x10
+#define STRING_GFLAGS_REGEXP            0x20
+#define STRING_GFLAGS_FAST_HEX_REGEXP   0x40
+#define STRING_GFLAGS_FULL_WORD         0x80
+#define STRING_GFLAGS_ANONYMOUS         0x100
+#define STRING_GFLAGS_SINGLE_MATCH      0x200
+#define STRING_GFLAGS_LITERAL           0x400
+#define STRING_GFLAGS_START_ANCHORED    0x800
+#define STRING_GFLAGS_END_ANCHORED      0x1000
+#define STRING_GFLAGS_FITS_IN_ATOM      0x2000
+#define STRING_GFLAGS_NULL              0x4000
 
 #define STRING_IS_HEX(x) \
     (((x)->g_flags) & STRING_GFLAGS_HEXADECIMAL)
@@ -199,6 +200,9 @@ typedef pthread_mutex_t mutex_t;
 #define STRING_IS_LITERAL(x) \
     (((x)->g_flags) & STRING_GFLAGS_LITERAL)
 
+#define STRING_IS_FAST_HEX_REGEXP(x) \
+    (((x)->g_flags) & STRING_GFLAGS_FAST_HEX_REGEXP)
+
 #define STRING_IS_START_ANCHORED(x) \
     (((x)->g_flags) & STRING_GFLAGS_START_ANCHORED)
 
diff --git a/yara.c b/yara.c
index 31269e3..e322a56 100644
--- a/yara.c
+++ b/yara.c
@@ -129,7 +129,7 @@ EXTERNAL* externals_list = NULL;
 // file_queue is size-limited queue stored as a circular array, files are
 // removed from queue_head position and new files are added at queue_tail
 // position. The array has room for one extra element to avoid queue_head
-// being equal to queue_tail in a full queue. The only situation where 
+// being equal to queue_tail in a full queue. The only situation where
 // queue_head == queue_tail is when queue is empty.
 
 QUEUED_FILE file_queue[MAX_QUEUED_FILES + 1];
@@ -301,7 +301,7 @@ void scan_dir(
         {
           file_queue_put(full_path);
         }
-        else if(recursive &&  
+        else if(recursive &&
                 S_ISDIR(st.st_mode) &&
                 !S_ISLNK(st.st_mode) &&
                 de->d_name[0] != '.')
@@ -529,7 +529,7 @@ int handle_message(int message, RULE* rule, void* data)
 
           while (match != NULL)
           {
-            printf("0x%zx:%s: ", match->first_offset, string->identifier);
+            printf("0x%llx:%s: ", match->first_offset, string->identifier);
 
             if (STRING_IS_HEX(string))
             {
@@ -539,7 +539,7 @@ int handle_message(int message, RULE* rule, void* data)
             {
               print_string(match->data, match->length);
             }
- 
+
             match = match->next;
           }
         }
@@ -585,7 +585,7 @@ void* scanning_thread(void* param)
 
   file_path = file_queue_get();
 
-  while (file_path != NULL) 
+  while (file_path != NULL)
   {
     result = yr_rules_scan_file(
         rules,
@@ -607,6 +607,8 @@ void* scanning_thread(void* param)
     file_path = file_queue_get();
   }
 
+  yr_re_finalize_thread();
+
   return 0;
 }
 
@@ -979,9 +981,9 @@ int main(
       print_scanning_error(result);
   }
   else if (is_directory(argv[argc - 1]))
-  {   
+  {
     file_queue_init();
-    
+
     for (i = 0; i < threads; i++)
     {
       if (create_thread(&thread[i], scanning_thread, (void*) rules) != 0)
@@ -1005,9 +1007,9 @@ int main(
   else
   {
 
-     
+
     start = clock();
-     
+
     result = yr_rules_scan_file(
         rules,
         argv[argc - 1],

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list