[Forensics-changes] [yara] 321/415: Use string chaining for every jump over a given threshold

Hilko Bengen bengen at moszumanska.debian.org
Thu Apr 3 05:43:19 UTC 2014


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to branch debian
in repository yara.

commit 521d4046a953defd4acef4d095a393da13084488
Author: Victor Manuel Alvarez <vmalvarez at virustotal.com>
Date:   Fri Dec 20 13:25:12 2013 +0100

    Use string chaining for every jump over a given threshold
---
 libyara/hex_grammar.c | 210 +++++++++++++++++++++++-------------
 libyara/hex_grammar.h |   2 +-
 libyara/hex_grammar.y |  61 ++++++++++-
 libyara/hex_lexer.c   |  26 ++---
 libyara/hex_lexer.l   |   7 --
 libyara/lexer.c       |  11 +-
 libyara/lexer.l       |   2 +-
 libyara/re.c          |  16 ++-
 libyara/re.h          |   5 +-
 libyara/rules.c       | 292 ++++++++++++++++++++++++++++++++++++++++----------
 libyara/yara.h        |  13 ++-
 11 files changed, 469 insertions(+), 176 deletions(-)

diff --git a/libyara/hex_grammar.c b/libyara/hex_grammar.c
index d57ab40..4850d5d 100644
--- a/libyara/hex_grammar.c
+++ b/libyara/hex_grammar.c
@@ -105,6 +105,9 @@
 #include <dmalloc.h>
 #endif
 
+#define STR_EXPAND(tok) #tok
+#define STR(tok) STR_EXPAND(tok)
+
 #define YYERROR_VERBOSE
 
 #define YYDEBUG 0
@@ -152,13 +155,13 @@ yydebug = 1;
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 71 "hex_grammar.y"
+#line 74 "hex_grammar.y"
 {
   int integer;
   RE_NODE *re_node;
 }
 /* Line 193 of yacc.c.  */
-#line 162 "hex_grammar.c"
+#line 165 "hex_grammar.c"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
@@ -171,7 +174,7 @@ typedef union YYSTYPE
 
 
 /* Line 216 of yacc.c.  */
-#line 175 "hex_grammar.c"
+#line 178 "hex_grammar.c"
 
 #ifdef short
 # undef short
@@ -386,16 +389,16 @@ union yyalloc
 /* YYFINAL -- State number of the termination state.  */
 #define YYFINAL  10
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   23
+#define YYLAST   25
 
 /* YYNTOKENS -- Number of terminals.  */
-#define YYNTOKENS  15
+#define YYNTOKENS  14
 /* YYNNTS -- Number of nonterminals.  */
 #define YYNNTS  8
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  15
+#define YYNRULES  16
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  26
+#define YYNSTATES  25
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
@@ -411,7 +414,7 @@ static const yytype_uint8 yytranslate[] =
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       8,     9,     2,     2,     2,    12,    13,     2,     2,     2,
+       8,     9,     2,     2,     2,    12,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
@@ -419,7 +422,7 @@ static const yytype_uint8 yytranslate[] =
        2,    10,     2,    11,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     6,    14,     7,     2,     2,     2,     2,
+       2,     2,     2,     6,    13,     7,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
@@ -442,24 +445,24 @@ static const yytype_uint8 yytranslate[] =
 static const yytype_uint8 yyprhs[] =
 {
        0,     0,     3,     7,     9,    12,    14,    15,    20,    24,
-      26,    30,    33,    35,    39,    41
+      26,    30,    33,    35,    37,    41,    43
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
 static const yytype_int8 yyrhs[] =
 {
-      16,     0,    -1,     6,    17,     7,    -1,    18,    -1,    17,
-      18,    -1,    22,    -1,    -1,     8,    19,    21,     9,    -1,
-      10,    20,    11,    -1,     5,    -1,     5,    12,     5,    -1,
-      13,    13,    -1,    17,    -1,    21,    14,    17,    -1,     3,
-      -1,     4,    -1
+      15,     0,    -1,     6,    16,     7,    -1,    17,    -1,    16,
+      17,    -1,    21,    -1,    -1,     8,    18,    20,     9,    -1,
+      10,    19,    11,    -1,     5,    -1,     5,    12,     5,    -1,
+       5,    12,    -1,    12,    -1,    16,    -1,    20,    13,    16,
+      -1,     3,    -1,     4,    -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
-static const yytype_uint8 yyrline[] =
+static const yytype_uint16 yyrline[] =
 {
-       0,    90,    90,    98,   102,   113,   118,   117,   126,   134,
-     149,   172,   198,   202,   214,   222
+       0,    93,    93,   101,   105,   116,   121,   120,   129,   137,
+     164,   201,   226,   253,   257,   269,   277
 };
 #endif
 
@@ -469,7 +472,7 @@ static const yytype_uint8 yyrline[] =
 static const char *const yytname[] =
 {
   "$end", "error", "$undefined", "_BYTE_", "_MASKED_BYTE_", "_NUMBER_",
-  "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'.'", "'|'", "$accept",
+  "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'|'", "$accept",
   "hex_string", "tokens", "token", "@1", "range", "alternatives", "byte", 0
 };
 #endif
@@ -480,22 +483,22 @@ static const char *const yytname[] =
 static const yytype_uint16 yytoknum[] =
 {
        0,   256,   257,   258,   259,   260,   123,   125,    40,    41,
-      91,    93,    45,    46,   124
+      91,    93,    45,   124
 };
 # endif
 
 /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
 static const yytype_uint8 yyr1[] =
 {
-       0,    15,    16,    17,    17,    18,    19,    18,    18,    20,
-      20,    20,    21,    21,    22,    22
+       0,    14,    15,    16,    16,    17,    18,    17,    17,    19,
+      19,    19,    19,    20,    20,    21,    21
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
 static const yytype_uint8 yyr2[] =
 {
        0,     2,     3,     1,     2,     1,     0,     4,     3,     1,
-       3,     2,     1,     3,     1,     1
+       3,     2,     1,     1,     3,     1,     1
 };
 
 /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -503,9 +506,9 @@ static const yytype_uint8 yyr2[] =
    means the default is an error.  */
 static const yytype_uint8 yydefact[] =
 {
-       0,     0,     0,    14,    15,     6,     0,     0,     3,     5,
-       1,     0,     9,     0,     0,     2,     4,    12,     0,     0,
-      11,     8,     7,     0,    10,    13
+       0,     0,     0,    15,    16,     6,     0,     0,     3,     5,
+       1,     0,     9,    12,     0,     2,     4,    13,     0,    11,
+       8,     7,     0,    10,    14
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
@@ -519,9 +522,9 @@ static const yytype_int8 yydefgoto[] =
 #define YYPACT_NINF -11
 static const yytype_int8 yypact[] =
 {
-      -4,     4,     5,   -11,   -11,   -11,    -2,    12,   -11,   -11,
-     -11,     4,    -6,     8,     6,   -11,   -11,     4,    -5,    18,
-     -11,   -11,   -11,     4,   -11,     4
+      -2,    10,     5,   -11,   -11,   -11,     3,    -1,   -11,   -11,
+     -11,    10,     4,   -11,     0,   -11,   -11,    10,    12,    14,
+     -11,   -11,    10,   -11,    10
 };
 
 /* YYPGOTO[NTERM-NUM].  */
@@ -537,25 +540,25 @@ static const yytype_int8 yypgoto[] =
 #define YYTABLE_NINF -1
 static const yytype_uint8 yytable[] =
 {
-      16,    17,     1,    12,    22,    10,    19,     3,     4,    23,
-      16,    13,     5,    25,     6,     3,     4,    21,    16,    15,
-       5,    20,     6,    24
+      16,    17,     3,     4,     1,    10,    15,     5,    12,     6,
+      16,    20,    24,     3,     4,    13,    19,    16,     5,    23,
+       6,    21,     0,     0,     0,    22
 };
 
-static const yytype_uint8 yycheck[] =
+static const yytype_int8 yycheck[] =
 {
-       7,    11,     6,     5,     9,     0,    12,     3,     4,    14,
-      17,    13,     8,    23,    10,     3,     4,    11,    25,     7,
-       8,    13,    10,     5
+       7,    11,     3,     4,     6,     0,     7,     8,     5,    10,
+      17,    11,    22,     3,     4,    12,    12,    24,     8,     5,
+      10,     9,    -1,    -1,    -1,    13
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
    symbol of state STATE-NUM.  */
 static const yytype_uint8 yystos[] =
 {
-       0,     6,    16,     3,     4,     8,    10,    17,    18,    22,
-       0,    19,     5,    13,    20,     7,    18,    17,    21,    12,
-      13,    11,     9,    14,     5,    17
+       0,     6,    15,     3,     4,     8,    10,    16,    17,    21,
+       0,    18,     5,    12,    19,     7,    17,    16,    20,    12,
+      11,     9,    13,     5,    16
 };
 
 #define yyerrok		(yyerrstatus = 0)
@@ -1075,30 +1078,30 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
 
   switch (yytype)
     {
-      case 17: /* "tokens" */
-#line 82 "hex_grammar.y"
+      case 16: /* "tokens" */
+#line 85 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1082 "hex_grammar.c"
+#line 1085 "hex_grammar.c"
 	break;
-      case 18: /* "token" */
-#line 83 "hex_grammar.y"
+      case 17: /* "token" */
+#line 86 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1087 "hex_grammar.c"
+#line 1090 "hex_grammar.c"
 	break;
-      case 20: /* "range" */
-#line 86 "hex_grammar.y"
+      case 19: /* "range" */
+#line 89 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1092 "hex_grammar.c"
+#line 1095 "hex_grammar.c"
 	break;
-      case 21: /* "alternatives" */
-#line 85 "hex_grammar.y"
+      case 20: /* "alternatives" */
+#line 88 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1097 "hex_grammar.c"
+#line 1100 "hex_grammar.c"
 	break;
-      case 22: /* "byte" */
-#line 84 "hex_grammar.y"
+      case 21: /* "byte" */
+#line 87 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1102 "hex_grammar.c"
+#line 1105 "hex_grammar.c"
 	break;
 
       default:
@@ -1408,7 +1411,7 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 91 "hex_grammar.y"
+#line 94 "hex_grammar.y"
     {
                 RE* re = yyget_extra(yyscanner);
                 re->root_node = (yyvsp[(2) - (3)].re_node);
@@ -1416,14 +1419,14 @@ yyreduce:
     break;
 
   case 3:
-#line 99 "hex_grammar.y"
+#line 102 "hex_grammar.y"
     {
             (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
          }
     break;
 
   case 4:
-#line 103 "hex_grammar.y"
+#line 106 "hex_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
 
@@ -1434,21 +1437,21 @@ yyreduce:
     break;
 
   case 5:
-#line 114 "hex_grammar.y"
+#line 117 "hex_grammar.y"
     {
           (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
         }
     break;
 
   case 6:
-#line 118 "hex_grammar.y"
+#line 121 "hex_grammar.y"
     {
           lex_env->inside_or++;
         }
     break;
 
   case 7:
-#line 122 "hex_grammar.y"
+#line 125 "hex_grammar.y"
     {
           (yyval.re_node) = (yyvsp[(3) - (4)].re_node);
           lex_env->inside_or--;
@@ -1456,7 +1459,7 @@ yyreduce:
     break;
 
   case 8:
-#line 127 "hex_grammar.y"
+#line 130 "hex_grammar.y"
     {
           (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
           (yyval.re_node)->greedy = FALSE;
@@ -1464,10 +1467,22 @@ yyreduce:
     break;
 
   case 9:
-#line 135 "hex_grammar.y"
+#line 138 "hex_grammar.y"
     {
           RE_NODE* re_any;
 
+          if (lex_env->inside_or && (yyvsp[(1) - (1)].integer) > STRING_CHAINING_THRESHOLD)
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup(
+                "jumps over "
+                STR(STRING_CHAINING_THRESHOLD)
+                " now allowed inside alternation (|)");
+
+            YYABORT;
+          }
+
           re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
 
           ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1482,15 +1497,29 @@ yyreduce:
     break;
 
   case 10:
-#line 150 "hex_grammar.y"
+#line 165 "hex_grammar.y"
     {
           RE_NODE* re_any;
 
+          if (lex_env->inside_or &&
+              ((yyvsp[(1) - (3)].integer) > STRING_CHAINING_THRESHOLD ||
+               (yyvsp[(3) - (3)].integer) > STRING_CHAINING_THRESHOLD) )
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup(
+                "jumps over "
+                STR(STRING_CHAINING_THRESHOLD)
+                " now allowed inside alternation (|)");
+
+            YYABORT;
+          }
+
           if ((yyvsp[(1) - (3)].integer) > (yyvsp[(3) - (3)].integer))
           {
             RE* re = yyget_extra(yyscanner);
             re->error_code = ERROR_INVALID_HEX_STRING;
-            re->error_message = yr_strdup("invalid range");
+            re->error_message = yr_strdup("invalid jump range");
             YYABORT;
           }
 
@@ -1508,7 +1537,7 @@ yyreduce:
     break;
 
   case 11:
-#line 173 "hex_grammar.y"
+#line 202 "hex_grammar.y"
     {
           RE_NODE* re_any;
 
@@ -1516,7 +1545,9 @@ yyreduce:
           {
             RE* re = yyget_extra(yyscanner);
             re->error_code = ERROR_INVALID_HEX_STRING;
-            re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+            re->error_message = yr_strdup(
+                "unbounded jumps not allowed inside alternation (|)");
+
             YYABORT;
           }
 
@@ -1528,20 +1559,47 @@ yyreduce:
 
           ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
 
-          (yyval.re_node)->start = 0;
+          (yyval.re_node)->start = (yyvsp[(1) - (2)].integer);
           (yyval.re_node)->end = INT_MAX;
         }
     break;
 
   case 12:
-#line 199 "hex_grammar.y"
+#line 227 "hex_grammar.y"
+    {
+          RE_NODE* re_any;
+
+          if (lex_env->inside_or)
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup(
+                "unbounded jumps not allowed inside alternation (|)");
+            YYABORT;
+          }
+
+          re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+          ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+          ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          (yyval.re_node)->start = 0;
+          (yyval.re_node)->end = INT_MAX;
+        }
+    break;
+
+  case 13:
+#line 254 "hex_grammar.y"
     {
                   (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
                }
     break;
 
-  case 13:
-#line 203 "hex_grammar.y"
+  case 14:
+#line 258 "hex_grammar.y"
     {
                   mark_as_not_fast_hex_regexp();
 
@@ -1553,8 +1611,8 @@ yyreduce:
                }
     break;
 
-  case 14:
-#line 215 "hex_grammar.y"
+  case 15:
+#line 270 "hex_grammar.y"
     {
           (yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
 
@@ -1564,8 +1622,8 @@ yyreduce:
         }
     break;
 
-  case 15:
-#line 223 "hex_grammar.y"
+  case 16:
+#line 278 "hex_grammar.y"
     {
           uint8_t mask = (yyvsp[(1) - (1)].integer) >> 8;
 
@@ -1589,7 +1647,7 @@ yyreduce:
 
 
 /* Line 1267 of yacc.c.  */
-#line 1593 "hex_grammar.c"
+#line 1651 "hex_grammar.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1803,7 +1861,7 @@ yyreturn:
 }
 
 
-#line 244 "hex_grammar.y"
+#line 299 "hex_grammar.y"
 
 
 
diff --git a/libyara/hex_grammar.h b/libyara/hex_grammar.h
index bc213c9..1b67276 100644
--- a/libyara/hex_grammar.h
+++ b/libyara/hex_grammar.h
@@ -54,7 +54,7 @@
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 71 "hex_grammar.y"
+#line 74 "hex_grammar.y"
 {
   int integer;
   RE_NODE *re_node;
diff --git a/libyara/hex_grammar.y b/libyara/hex_grammar.y
index 3050e04..e98073f 100644
--- a/libyara/hex_grammar.y
+++ b/libyara/hex_grammar.y
@@ -30,6 +30,9 @@ limitations under the License.
 #include <dmalloc.h>
 #endif
 
+#define STR_EXPAND(tok) #tok
+#define STR(tok) STR_EXPAND(tok)
+
 #define YYERROR_VERBOSE
 
 #define YYDEBUG 0
@@ -135,6 +138,18 @@ range : _NUMBER_
         {
           RE_NODE* re_any;
 
+          if (lex_env->inside_or && $1 > STRING_CHAINING_THRESHOLD)
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup(
+                "jumps over "
+                STR(STRING_CHAINING_THRESHOLD)
+                " now allowed inside alternation (|)");
+
+            YYABORT;
+          }
+
           re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
 
           ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -150,11 +165,25 @@ range : _NUMBER_
         {
           RE_NODE* re_any;
 
+          if (lex_env->inside_or &&
+              ($1 > STRING_CHAINING_THRESHOLD ||
+               $3 > STRING_CHAINING_THRESHOLD) )
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup(
+                "jumps over "
+                STR(STRING_CHAINING_THRESHOLD)
+                " now allowed inside alternation (|)");
+
+            YYABORT;
+          }
+
           if ($1 > $3)
           {
             RE* re = yyget_extra(yyscanner);
             re->error_code = ERROR_INVALID_HEX_STRING;
-            re->error_message = yr_strdup("invalid range");
+            re->error_message = yr_strdup("invalid jump range");
             YYABORT;
           }
 
@@ -169,7 +198,32 @@ range : _NUMBER_
           $$->start = $1;
           $$->end = $3;
         }
-      | '.' '.'
+      | _NUMBER_ '-'
+        {
+          RE_NODE* re_any;
+
+          if (lex_env->inside_or)
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup(
+                "unbounded jumps not allowed inside alternation (|)");
+
+            YYABORT;
+          }
+
+          re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+          ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          $$ = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+          ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          $$->start = $1;
+          $$->end = INT_MAX;
+        }
+      | '-'
         {
           RE_NODE* re_any;
 
@@ -177,7 +231,8 @@ range : _NUMBER_
           {
             RE* re = yyget_extra(yyscanner);
             re->error_code = ERROR_INVALID_HEX_STRING;
-            re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+            re->error_message = yr_strdup(
+                "unbounded jumps not allowed inside alternation (|)");
             YYABORT;
           }
 
diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c
index d462d99..271d58d 100644
--- a/libyara/hex_lexer.c
+++ b/libyara/hex_lexer.c
@@ -47,6 +47,7 @@ typedef int16_t flex_int16_t;
 typedef uint16_t flex_uint16_t;
 typedef int32_t flex_int32_t;
 typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
 #else
 typedef signed char flex_int8_t;
 typedef short int flex_int16_t;
@@ -357,7 +358,7 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
  */
 #define YY_DO_BEFORE_ACTION \
 	yyg->yytext_ptr = yy_bp; \
-	yyleng = (size_t) (yy_cp - yy_bp); \
+	yyleng = (yy_size_t) (yy_cp - yy_bp); \
 	yyg->yy_hold_char = *yy_cp; \
 	*yy_cp = '\0'; \
 	yyg->yy_c_buf_p = yy_cp;
@@ -505,7 +506,7 @@ limitations under the License.
 #define YY_NO_UNISTD_H 1
 #define YY_NO_INPUT 1
 
-#line 509 "hex_lexer.c"
+#line 510 "hex_lexer.c"
 
 #define INITIAL 0
 #define range 1
@@ -741,7 +742,7 @@ YY_DECL
 
 
 
-#line 745 "hex_lexer.c"
+#line 746 "hex_lexer.c"
 
     yylval = yylval_param;
 
@@ -905,19 +906,12 @@ YY_RULE_SETUP
 {
 
   yylval->integer = atoi(yytext);
-
-  if (yylval->integer > INT16_MAX)
-  {
-    yyerror(yyscanner, lex_env, "range value too large");
-    yyterminate();
-  }
-
   return _NUMBER_;
 }
 	YY_BREAK
 case 9:
 YY_RULE_SETUP
-#line 124 "hex_lexer.l"
+#line 117 "hex_lexer.l"
 {
 
   BEGIN(INITIAL);
@@ -927,12 +921,12 @@ YY_RULE_SETUP
 case 10:
 /* rule 10 can match eol */
 YY_RULE_SETUP
-#line 131 "hex_lexer.l"
+#line 124 "hex_lexer.l"
 // skip whitespace
 	YY_BREAK
 case 11:
 YY_RULE_SETUP
-#line 134 "hex_lexer.l"
+#line 127 "hex_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -948,10 +942,10 @@ YY_RULE_SETUP
 	YY_BREAK
 case 12:
 YY_RULE_SETUP
-#line 147 "hex_lexer.l"
+#line 140 "hex_lexer.l"
 ECHO;
 	YY_BREAK
-#line 955 "hex_lexer.c"
+#line 949 "hex_lexer.c"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(range):
 	yyterminate();
@@ -2087,7 +2081,7 @@ void hex_yyfree (void * ptr , yyscan_t yyscanner)
 
 #define YYTABLES_NAME "yytables"
 
-#line 147 "hex_lexer.l"
+#line 140 "hex_lexer.l"
 
 
 
diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l
index c9e1df0..ea963e1 100644
--- a/libyara/hex_lexer.l
+++ b/libyara/hex_lexer.l
@@ -111,13 +111,6 @@ hexdigit      [a-fA-F0-9]
 <range>{digit}+ {
 
   yylval->integer = atoi(yytext);
-
-  if (yylval->integer > INT16_MAX)
-  {
-    yyerror(yyscanner, lex_env, "range value too large");
-    yyterminate();
-  }
-
   return _NUMBER_;
 }
 
diff --git a/libyara/lexer.c b/libyara/lexer.c
index cc3ed40..68f6588 100644
--- a/libyara/lexer.c
+++ b/libyara/lexer.c
@@ -47,6 +47,7 @@ typedef int16_t flex_int16_t;
 typedef uint16_t flex_uint16_t;
 typedef int32_t flex_int32_t;
 typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
 #else
 typedef signed char flex_int8_t;
 typedef short int flex_int16_t;
@@ -357,7 +358,7 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
  */
 #define YY_DO_BEFORE_ACTION \
 	yyg->yytext_ptr = yy_bp; \
-	yyleng = (size_t) (yy_cp - yy_bp); \
+	yyleng = (yy_size_t) (yy_cp - yy_bp); \
 	yyg->yy_hold_char = *yy_cp; \
 	*yy_cp = '\0'; \
 	yyg->yy_c_buf_p = yy_cp;
@@ -406,7 +407,7 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    4,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    2,    5,    6,    7,    8,    1,    1,    1,    9,
-        9,   10,    1,    1,    9,    9,   11,   12,   13,   14,
+        9,   10,    1,    1,    9,    1,   11,   12,   13,   14,
        15,   16,   16,   17,   16,   18,   16,    1,    1,   19,
        20,   21,    9,   22,   23,   24,   23,   23,   23,   23,
        25,   25,   25,   25,   26,   25,   27,   25,   25,   25,
@@ -686,7 +687,7 @@ limitations under the License.
 
 
 
-#line 690 "lexer.c"
+#line 691 "lexer.c"
 
 #define INITIAL 0
 #define str 1
@@ -924,7 +925,7 @@ YY_DECL
 #line 86 "lexer.l"
 
 
-#line 928 "lexer.c"
+#line 929 "lexer.c"
 
     yylval = yylval_param;
 
@@ -1710,7 +1711,7 @@ YY_RULE_SETUP
 #line 512 "lexer.l"
 ECHO;
 	YY_BREAK
-#line 1714 "lexer.c"
+#line 1715 "lexer.c"
 
 	case YY_END_OF_BUFFER:
 		{
diff --git a/libyara/lexer.l b/libyara/lexer.l
index a54982e..2d579ea 100644
--- a/libyara/lexer.l
+++ b/libyara/lexer.l
@@ -479,7 +479,7 @@ $({letter}|{digit}|_)*  {
 }
 
 
-\{({hexdigit}|[ \-|\?\[\]\(\)\.\n\t])+\}  {
+\{({hexdigit}|[ \-|\?\[\]\(\)\n\t])+\}  {
 
   int len = strlen(yytext);
   SIZED_STRING* s = (SIZED_STRING*) yr_malloc(len + sizeof(SIZED_STRING));
diff --git a/libyara/re.c b/libyara/re.c
index ae589f4..611c546 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -322,7 +322,9 @@ SIZED_STRING* yr_re_extract_literal(
 int yr_re_split_at_chaining_point(
     RE* re,
     RE** result_re,
-    RE** remainder_re)
+    RE** remainder_re,
+    int32_t* min_gap,
+    int32_t* max_gap)
 {
   RE_NODE* node = re->root_node;
   RE_NODE* child = re->root_node->left;
@@ -332,15 +334,17 @@ int yr_re_split_at_chaining_point(
 
   *result_re = re;
   *remainder_re = NULL;
+  *min_gap = 0;
+  *max_gap = 0;
 
   while (child != NULL && child->type == RE_NODE_CONCAT)
   {
     if (child->right != NULL &&
         child->right->type == RE_NODE_RANGE &&
         child->right->greedy == FALSE &&
-        child->right->start == 0 &&
-        child->right->end == INT_MAX &&
-        child->right->left->type == RE_NODE_ANY)
+        child->right->left->type == RE_NODE_ANY &&
+        (child->right->start > STRING_CHAINING_THRESHOLD ||
+         child->right->end > STRING_CHAINING_THRESHOLD))
     {
       result = yr_re_create(remainder_re);
 
@@ -357,6 +361,10 @@ int yr_re_split_at_chaining_point(
         (*result_re)->root_node = node->right;
 
       node->right = NULL;
+
+      *min_gap = child->right->start;
+      *max_gap = child->right->end;
+
       yr_re_node_destroy(node);
 
       return ERROR_SUCCESS;
diff --git a/libyara/re.h b/libyara/re.h
index f1a0dae..835c0b8 100644
--- a/libyara/re.h
+++ b/libyara/re.h
@@ -166,10 +166,13 @@ SIZED_STRING* yr_re_extract_literal(
     RE* re);
 
 
+
 int yr_re_split_at_chaining_point(
     RE* re,
     RE** result_re,
-    RE** remainder_re);
+    RE** remainder_re,
+    int32_t* min_gap,
+    int32_t* max_gap);
 
 
 int yr_re_emit_code(
diff --git a/libyara/rules.c b/libyara/rules.c
index 104db1d..be48741 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -1,4 +1,4 @@
- /*
+    /*
 Copyright (c) 2013. Victor M. Alvarez [plusvic at gmail.com].
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -304,7 +304,7 @@ void _yr_scan_confirm_matches(
     int tidx,
     YR_STRING* string,
     size_t match_offset,
-    int match_length)
+    int32_t match_length)
 {
   YR_MATCH* match;
   YR_MATCH* next_match;
@@ -353,18 +353,220 @@ void _yr_scan_confirm_matches(
 }
 
 
-void _yr_rules_match_callback(
+void _yr_scan_update_match_chain_length(
+    int tidx,
+    YR_STRING* string,
+    YR_MATCH* match_to_update,
+    int chain_length)
+{
+  YR_MATCH* match;
+  size_t ending_offset;
+
+  match_to_update->chain_length = chain_length;
+
+  if (string->chained_to != NULL)
+    match = string->chained_to->unconfirmed_matches[tidx].head;
+  else
+    match = NULL;
+
+  while (match != NULL)
+  {
+    ending_offset = match->offset + match->length;
+
+    if (ending_offset + string->chain_gap_max >= match_to_update->offset &&
+        ending_offset + string->chain_gap_min <= match_to_update->offset)
+    {
+      _yr_scan_update_match_chain_length(
+          tidx, string->chained_to, match, chain_length + 1);
+    }
+
+    match = match->next;
+  }
+}
+
+
+void _yr_scan_add_match_to_list(
+    YR_MATCH* match,
+    YR_MATCHES* matches_list)
+{
+  YR_MATCH* insertion_point;
+
+  insertion_point = matches_list->tail;
+
+  while (insertion_point != NULL)
+  {
+    if (match->offset == insertion_point->offset)
+    {
+      insertion_point->length = match->length;
+      return;
+    }
+
+    if (match->offset > insertion_point->offset)
+      break;
+
+    insertion_point = insertion_point->prev;
+  }
+
+  match->prev = insertion_point;
+
+  if (insertion_point != NULL)
+  {
+    match->next = insertion_point->next;
+    insertion_point->next = match;
+  }
+  else
+  {
+    match->next = matches_list->head;
+    matches_list->head = match;
+  }
+
+  if (match->next != NULL)
+    match->next->prev = match;
+  else
+    matches_list->tail = match;
+}
+
+
+void _yr_scan_remove_match_from_list(
+    YR_MATCH* match,
+    YR_MATCHES* matches_list)
+{
+  if (match->prev != NULL)
+    match->prev->next = match->next;
+
+  if (match->next != NULL)
+    match->next->prev = match->prev;
+
+  if (matches_list->head == match)
+    matches_list->head = match->next;
+
+  if (matches_list->tail == match)
+    matches_list->tail = match->prev;
+}
+
+void _yr_scan_handle_chained_matches(
+    YR_ARENA* matches_arena,
+    YR_STRING* matching_string,
     uint8_t* match_data,
-    int match_length,
-    int flags,
-    void* args)
+    size_t match_offset,
+    int32_t match_length,
+    int tidx)
 {
-  YR_MATCH* new_match;
+  YR_STRING* string;
   YR_MATCH* match;
-  YR_MATCHES* matches;
+  YR_MATCH* next_match;
+  YR_MATCH* new_match;
+
+  size_t lower_offset;
+  size_t ending_offset;
+  int32_t full_chain_length;
+
+  int add_match = FALSE;
+
+  if (matching_string->chained_to == NULL)
+  {
+    add_match = TRUE;
+  }
+  else
+  {
+    if (matching_string->unconfirmed_matches[tidx].head != NULL)
+      lower_offset = matching_string->unconfirmed_matches[tidx].head->offset;
+    else
+      lower_offset = match_offset;
+
+    match = matching_string->chained_to->unconfirmed_matches[tidx].head;
 
+    while (match != NULL)
+    {
+      next_match = match->next;
+      ending_offset = match->offset + match->length;
+
+      if (ending_offset + matching_string->chain_gap_max < lower_offset)
+      {
+        _yr_scan_remove_match_from_list(
+            match, &matching_string->chained_to->unconfirmed_matches[tidx]);
+      }
+      else
+      {
+        if (ending_offset + matching_string->chain_gap_max >= match_offset &&
+            ending_offset + matching_string->chain_gap_min <= match_offset)
+        {
+          _yr_scan_update_match_chain_length(
+              tidx, matching_string->chained_to, match, 1);
+
+          add_match = TRUE;
+        }
+      }
+
+      match = next_match;
+    }
+  }
+
+  if (add_match)
+  {
+    if (STRING_IS_CHAIN_TAIL(matching_string))
+    {
+      full_chain_length = 0;
+      string = matching_string;
+
+      while(string->chained_to != NULL)
+      {
+        full_chain_length++;
+        string = string->chained_to;
+      }
+
+      // "string" points now to the head of the strings chain
+
+      match = string->unconfirmed_matches[tidx].head;
+
+      while (match != NULL)
+      {
+        next_match = match->next;
+
+        if (match->chain_length == full_chain_length)
+        {
+          _yr_scan_remove_match_from_list(
+              match, &string->unconfirmed_matches[tidx]);
+
+          match->length = match_offset - match->offset + match_length;
+          match->data = match_data - match_offset + match->offset;
+
+          _yr_scan_add_match_to_list(
+              match, &string->matches[tidx]);
+        }
+
+        match = next_match;
+      }
+    }
+    else
+    {
+      yr_arena_allocate_memory(
+          matches_arena,
+          sizeof(YR_MATCH),
+          (void**) &new_match);
+
+      new_match->offset = match_offset;
+      new_match->length = match_length;
+      new_match->data = match_data;
+
+      _yr_scan_add_match_to_list(
+          new_match,
+          &matching_string->unconfirmed_matches[tidx]);
+    }
+  }
+}
+
+
+void _yr_scan_match_callback(
+    uint8_t* match_data,
+    int32_t match_length,
+    int flags,
+    void* args)
+{
   CALLBACK_ARGS* callback_args = args;
+
   YR_STRING* string = callback_args->string;
+  YR_MATCH* new_match;
 
   int character_size;
   int tidx = callback_args->tidx;
@@ -384,6 +586,7 @@ void _yr_rules_match_callback(
     match_length -= character_size;
 
   // total match length is the sum of backward and forward matches.
+
   match_length = match_length + callback_args->forward_matches;
 
   if (callback_args->full_word)
@@ -412,60 +615,31 @@ void _yr_rules_match_callback(
     }
   }
 
-  if (STRING_IS_CHAIN_TAIL(string))
-  {
-    _yr_scan_confirm_matches(tidx, string, match_offset, match_length);
-    return;
-  }
-
   if (STRING_IS_CHAIN_PART(string))
-    matches = &string->unconfirmed_matches[tidx];
-  else
-    matches = &string->matches[tidx];
-
-  match = matches->tail;
-
-  while (match != NULL)
-  {
-    if (match_length == match->length)
-    {
-      if (match_offset == match->offset)
-        return;
-    }
-
-    if (match_offset > match->offset)
-      break;
-
-    match = match->prev;
-  }
-
-  yr_arena_allocate_memory(
-      callback_args->matches_arena,
-      sizeof(YR_MATCH),
-      (void**) &new_match);
-
-  new_match->offset = match_offset;
-  new_match->length = match_length;
-  new_match->data = match_data;
-
-  if (match != NULL)
   {
-    new_match->next = match->next;
-    new_match->prev = match;
-    match->next = new_match;
+    _yr_scan_handle_chained_matches(
+        callback_args->matches_arena,
+        string,
+        match_data,
+        match_offset,
+        match_length,
+        tidx);
   }
   else
   {
-    new_match->next = matches->head;
-    matches->head = new_match;
+    yr_arena_allocate_memory(
+        callback_args->matches_arena,
+        sizeof(YR_MATCH),
+        (void**) &new_match);
+
+    new_match->offset = match_offset;
+    new_match->length = match_length;
+    new_match->data = match_data;
+
+    _yr_scan_add_match_to_list(
+        new_match,
+        &string->matches[tidx]);
   }
-
-  if (new_match->next != NULL)
-    new_match->next->prev = new_match;
-  else
-    matches->tail = new_match;
-
-  new_match->prev = match;
 }
 
 
@@ -547,12 +721,12 @@ int _yr_scan_verify_re_match(
         data + offset,
         offset + 1,
         flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
-        _yr_rules_match_callback,
+        _yr_scan_match_callback,
         (void*) &callback_args);
   }
   else
   {
-    _yr_rules_match_callback(
+    _yr_scan_match_callback(
         data + offset, 0, flags, &callback_args);
   }
 
@@ -661,7 +835,7 @@ int _yr_scan_verify_literal_match(
     callback_args.full_word = STRING_IS_FULL_WORD(string);
     callback_args.tidx = yr_get_tidx();
 
-    _yr_rules_match_callback(
+    _yr_scan_match_callback(
         data + offset, 0, flags, &callback_args);
   }
 
diff --git a/libyara/yara.h b/libyara/yara.h
index aff92f0..6c01e7b 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -95,12 +95,12 @@ typedef pthread_mutex_t mutex_t;
 #define CALLBACK_ABORT     1
 #define CALLBACK_ERROR     2
 
-
 #define MAX_ATOM_LENGTH 4
 #define LOOP_LOCAL_VARS 4
 #define MAX_LOOP_NESTING 4
 #define MAX_INCLUDE_DEPTH 16
 #define MAX_THREADS 32
+#define STRING_CHAINING_THRESHOLD 256
 #define LEX_BUF_SIZE  1024
 
 
@@ -302,9 +302,13 @@ typedef struct _YR_ARENA
 
 typedef struct _YR_MATCH
 {
-  uint8_t* data;
-  uint32_t length;
   int64_t offset;
+  int32_t length;
+
+  union {
+    uint8_t* data;            // Confirmed matches use "data",
+    int32_t chain_length;    // unconfirmed ones use "chain_length"
+  };
 
   struct _YR_MATCH*  prev;
   struct _YR_MATCH*  next;
@@ -348,6 +352,9 @@ typedef struct _YR_STRING
   DECLARE_REFERENCE(uint8_t*, string);
   DECLARE_REFERENCE(struct _YR_STRING*, chained_to);
 
+  int32_t chain_gap_min;
+  int32_t chain_gap_max;
+
   YR_MATCHES matches[MAX_THREADS];
   YR_MATCHES unconfirmed_matches[MAX_THREADS];
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list