[Forensics-changes] [yara] 311/415: Implement unbound jumps in hex strings by using chaining multiple strings

Hilko Bengen bengen at moszumanska.debian.org
Thu Apr 3 05:43:18 UTC 2014


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to branch debian
in repository yara.

commit ddf3dda4de4ecc3d4a7ad04ce005aff80a36fa73
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Tue Dec 17 15:55:03 2013 +0100

    Implement unbound jumps in hex strings by using chaining multiple strings
---
 libyara/hex_grammar.c | 217 ++++++++++++++++--------------
 libyara/hex_grammar.h |   2 +-
 libyara/hex_grammar.y |  56 ++++----
 libyara/hex_lexer.c   | 113 ++++++++--------
 libyara/hex_lexer.h   |   1 +
 libyara/hex_lexer.l   |  12 +-
 libyara/lexer.c       |   2 +-
 libyara/lexer.l       |   2 +-
 libyara/parser.c      | 359 +++++++++++++++++++++++++++++++-------------------
 libyara/re.c          | 131 ++++++++++++++++--
 libyara/re.h          |  19 ++-
 libyara/re_grammar.c  | 127 +++++++-----------
 libyara/re_grammar.h  |   2 +-
 libyara/re_grammar.y  |  39 ------
 libyara/re_lexer.c    |   8 --
 libyara/re_lexer.l    |   8 --
 libyara/rules.c       |  92 ++++++++++---
 libyara/yara.h        |  84 ++++++------
 18 files changed, 741 insertions(+), 533 deletions(-)

diff --git a/libyara/hex_grammar.c b/libyara/hex_grammar.c
index 8d27cf3..d57ab40 100644
--- a/libyara/hex_grammar.c
+++ b/libyara/hex_grammar.c
@@ -92,6 +92,7 @@
 
 
 #include <stdint.h>
+#include <limits.h>
 
 #include "hex_lexer.h"
 #include "mem.h"
@@ -108,9 +109,6 @@
 
 #define YYDEBUG 0
 
-#define mark_as_not_literal() \
-    ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
 #define mark_as_not_fast_hex_regexp() \
     ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
 
@@ -154,13 +152,13 @@ yydebug = 1;
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 73 "hex_grammar.y"
+#line 71 "hex_grammar.y"
 {
   int integer;
   RE_NODE *re_node;
 }
 /* Line 193 of yacc.c.  */
-#line 164 "hex_grammar.c"
+#line 162 "hex_grammar.c"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
@@ -173,7 +171,7 @@ typedef union YYSTYPE
 
 
 /* Line 216 of yacc.c.  */
-#line 177 "hex_grammar.c"
+#line 175 "hex_grammar.c"
 
 #ifdef short
 # undef short
@@ -391,13 +389,13 @@ union yyalloc
 #define YYLAST   23
 
 /* YYNTOKENS -- Number of terminals.  */
-#define YYNTOKENS  14
+#define YYNTOKENS  15
 /* YYNNTS -- Number of nonterminals.  */
-#define YYNNTS  7
+#define YYNNTS  8
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  13
+#define YYNRULES  15
 /* YYNRULES -- Number of states.  */
-#define YYNSTATES  23
+#define YYNSTATES  26
 
 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX.  */
 #define YYUNDEFTOK  2
@@ -413,7 +411,7 @@ static const yytype_uint8 yytranslate[] =
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       8,     9,     2,     2,     2,    12,     2,     2,     2,     2,
+       8,     9,     2,     2,     2,    12,    13,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
@@ -421,7 +419,7 @@ static const yytype_uint8 yytranslate[] =
        2,    10,     2,    11,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     6,    13,     7,     2,     2,     2,     2,
+       2,     2,     2,     6,    14,     7,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
@@ -443,24 +441,25 @@ static const yytype_uint8 yytranslate[] =
    YYRHS.  */
 static const yytype_uint8 yyprhs[] =
 {
-       0,     0,     3,     7,     9,    12,    14,    18,    22,    24,
-      28,    30,    34,    36
+       0,     0,     3,     7,     9,    12,    14,    15,    20,    24,
+      26,    30,    33,    35,    39,    41
 };
 
 /* YYRHS -- A `-1'-separated list of the rules' RHS.  */
 static const yytype_int8 yyrhs[] =
 {
-      15,     0,    -1,     6,    16,     7,    -1,    17,    -1,    16,
-      17,    -1,    20,    -1,     8,    19,     9,    -1,    10,    18,
-      11,    -1,     5,    -1,     5,    12,     5,    -1,    16,    -1,
-      19,    13,    16,    -1,     3,    -1,     4,    -1
+      16,     0,    -1,     6,    17,     7,    -1,    18,    -1,    17,
+      18,    -1,    22,    -1,    -1,     8,    19,    21,     9,    -1,
+      10,    20,    11,    -1,     5,    -1,     5,    12,     5,    -1,
+      13,    13,    -1,    17,    -1,    21,    14,    17,    -1,     3,
+      -1,     4,    -1
 };
 
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
 static const yytype_uint8 yyrline[] =
 {
-       0,    92,    92,   100,   104,   115,   119,   123,   132,   147,
-     173,   177,   190,   214
+       0,    90,    90,    98,   102,   113,   118,   117,   126,   134,
+     149,   172,   198,   202,   214,   222
 };
 #endif
 
@@ -470,8 +469,8 @@ static const yytype_uint8 yyrline[] =
 static const char *const yytname[] =
 {
   "$end", "error", "$undefined", "_BYTE_", "_MASKED_BYTE_", "_NUMBER_",
-  "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'|'", "$accept",
-  "hex_string", "tokens", "token", "range", "alternatives", "byte", 0
+  "'{'", "'}'", "'('", "')'", "'['", "']'", "'-'", "'.'", "'|'", "$accept",
+  "hex_string", "tokens", "token", "@1", "range", "alternatives", "byte", 0
 };
 #endif
 
@@ -481,22 +480,22 @@ static const char *const yytname[] =
 static const yytype_uint16 yytoknum[] =
 {
        0,   256,   257,   258,   259,   260,   123,   125,    40,    41,
-      91,    93,    45,   124
+      91,    93,    45,    46,   124
 };
 # endif
 
 /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
 static const yytype_uint8 yyr1[] =
 {
-       0,    14,    15,    16,    16,    17,    17,    17,    18,    18,
-      19,    19,    20,    20
+       0,    15,    16,    17,    17,    18,    19,    18,    18,    20,
+      20,    20,    21,    21,    22,    22
 };
 
 /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN.  */
 static const yytype_uint8 yyr2[] =
 {
-       0,     2,     3,     1,     2,     1,     3,     3,     1,     3,
-       1,     3,     1,     1
+       0,     2,     3,     1,     2,     1,     0,     4,     3,     1,
+       3,     2,     1,     3,     1,     1
 };
 
 /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
@@ -504,31 +503,31 @@ static const yytype_uint8 yyr2[] =
    means the default is an error.  */
 static const yytype_uint8 yydefact[] =
 {
-       0,     0,     0,    12,    13,     0,     0,     0,     3,     5,
-       1,    10,     0,     8,     0,     2,     4,     6,     0,     0,
-       7,    11,     9
+       0,     0,     0,    14,    15,     6,     0,     0,     3,     5,
+       1,     0,     9,     0,     0,     2,     4,    12,     0,     0,
+      11,     8,     7,     0,    10,    13
 };
 
 /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int8 yydefgoto[] =
 {
-      -1,     2,     7,     8,    14,    12,     9
+      -1,     2,     7,     8,    11,    14,    18,     9
 };
 
 /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
    STATE-NUM.  */
-#define YYPACT_NINF -8
+#define YYPACT_NINF -11
 static const yytype_int8 yypact[] =
 {
-      -5,    13,     8,    -8,    -8,    13,     6,     2,    -8,    -8,
-      -8,    13,    -6,     1,     7,    -8,    -8,    -8,    13,    14,
-      -8,    13,    -8
+      -4,     4,     5,   -11,   -11,   -11,    -2,    12,   -11,   -11,
+     -11,     4,    -6,     8,     6,   -11,   -11,     4,    -5,    18,
+     -11,   -11,   -11,     4,   -11,     4
 };
 
 /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int8 yypgoto[] =
 {
-      -8,    -8,    -3,    -7,    -8,    -8,    -8
+     -11,   -11,   -10,    -7,   -11,   -11,   -11,   -11
 };
 
 /* YYTABLE[YYPACT[STATE-NUM]].  What to do in state STATE-NUM.  If
@@ -538,25 +537,25 @@ static const yytype_int8 yypgoto[] =
 #define YYTABLE_NINF -1
 static const yytype_uint8 yytable[] =
 {
-      16,     1,    11,    17,    16,     3,     4,    18,    10,    15,
-       5,    13,     6,    19,    16,    21,     3,     4,    20,    22,
-       0,     5,     0,     6
+      16,    17,     1,    12,    22,    10,    19,     3,     4,    23,
+      16,    13,     5,    25,     6,     3,     4,    21,    16,    15,
+       5,    20,     6,    24
 };
 
-static const yytype_int8 yycheck[] =
+static const yytype_uint8 yycheck[] =
 {
-       7,     6,     5,     9,    11,     3,     4,    13,     0,     7,
-       8,     5,    10,    12,    21,    18,     3,     4,    11,     5,
-      -1,     8,    -1,    10
+       7,    11,     6,     5,     9,     0,    12,     3,     4,    14,
+      17,    13,     8,    23,    10,     3,     4,    11,    25,     7,
+       8,    13,    10,     5
 };
 
 /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
    symbol of state STATE-NUM.  */
 static const yytype_uint8 yystos[] =
 {
-       0,     6,    15,     3,     4,     8,    10,    16,    17,    20,
-       0,    16,    19,     5,    18,     7,    17,     9,    13,    12,
-      11,    16,     5
+       0,     6,    16,     3,     4,     8,    10,    17,    18,    22,
+       0,    19,     5,    13,    20,     7,    18,    17,    21,    12,
+      13,    11,     9,    14,     5,    17
 };
 
 #define yyerrok		(yyerrstatus = 0)
@@ -1076,30 +1075,30 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
 
   switch (yytype)
     {
-      case 16: /* "tokens" */
-#line 84 "hex_grammar.y"
+      case 17: /* "tokens" */
+#line 82 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1083 "hex_grammar.c"
+#line 1082 "hex_grammar.c"
 	break;
-      case 17: /* "token" */
-#line 85 "hex_grammar.y"
+      case 18: /* "token" */
+#line 83 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1088 "hex_grammar.c"
+#line 1087 "hex_grammar.c"
 	break;
-      case 18: /* "range" */
-#line 88 "hex_grammar.y"
+      case 20: /* "range" */
+#line 86 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1093 "hex_grammar.c"
+#line 1092 "hex_grammar.c"
 	break;
-      case 19: /* "alternatives" */
-#line 87 "hex_grammar.y"
+      case 21: /* "alternatives" */
+#line 85 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1098 "hex_grammar.c"
+#line 1097 "hex_grammar.c"
 	break;
-      case 20: /* "byte" */
-#line 86 "hex_grammar.y"
+      case 22: /* "byte" */
+#line 84 "hex_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1103 "hex_grammar.c"
+#line 1102 "hex_grammar.c"
 	break;
 
       default:
@@ -1409,7 +1408,7 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 93 "hex_grammar.y"
+#line 91 "hex_grammar.y"
     {
                 RE* re = yyget_extra(yyscanner);
                 re->root_node = (yyvsp[(2) - (3)].re_node);
@@ -1417,14 +1416,14 @@ yyreduce:
     break;
 
   case 3:
-#line 101 "hex_grammar.y"
+#line 99 "hex_grammar.y"
     {
             (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
          }
     break;
 
   case 4:
-#line 105 "hex_grammar.y"
+#line 103 "hex_grammar.y"
     {
             (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
 
@@ -1435,30 +1434,37 @@ yyreduce:
     break;
 
   case 5:
-#line 116 "hex_grammar.y"
+#line 114 "hex_grammar.y"
     {
           (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
         }
     break;
 
   case 6:
-#line 120 "hex_grammar.y"
+#line 118 "hex_grammar.y"
     {
-          (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
+          lex_env->inside_or++;
         }
     break;
 
   case 7:
-#line 124 "hex_grammar.y"
+#line 122 "hex_grammar.y"
+    {
+          (yyval.re_node) = (yyvsp[(3) - (4)].re_node);
+          lex_env->inside_or--;
+        }
+    break;
+
+  case 8:
+#line 127 "hex_grammar.y"
     {
-          mark_as_not_literal();
           (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
           (yyval.re_node)->greedy = FALSE;
         }
     break;
 
-  case 8:
-#line 133 "hex_grammar.y"
+  case 9:
+#line 135 "hex_grammar.y"
     {
           RE_NODE* re_any;
 
@@ -1475,8 +1481,8 @@ yyreduce:
         }
     break;
 
-  case 9:
-#line 148 "hex_grammar.y"
+  case 10:
+#line 150 "hex_grammar.y"
     {
           RE_NODE* re_any;
 
@@ -1501,17 +1507,42 @@ yyreduce:
         }
     break;
 
-  case 10:
-#line 174 "hex_grammar.y"
+  case 11:
+#line 173 "hex_grammar.y"
+    {
+          RE_NODE* re_any;
+
+          if (lex_env->inside_or)
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+            YYABORT;
+          }
+
+          re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+          ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+          ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          (yyval.re_node)->start = 0;
+          (yyval.re_node)->end = INT_MAX;
+        }
+    break;
+
+  case 12:
+#line 199 "hex_grammar.y"
     {
                   (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
                }
     break;
 
-  case 11:
-#line 178 "hex_grammar.y"
+  case 13:
+#line 203 "hex_grammar.y"
     {
-                  mark_as_not_literal();
                   mark_as_not_fast_hex_regexp();
 
                   (yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
@@ -1522,40 +1553,22 @@ yyreduce:
                }
     break;
 
-  case 12:
-#line 191 "hex_grammar.y"
+  case 14:
+#line 215 "hex_grammar.y"
     {
-          RE* re = yyget_extra(yyscanner);
-
           (yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
 
           ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
 
           (yyval.re_node)->value = (yyvsp[(1) - (1)].integer);
-
-          if (re->literal_string_len == re->literal_string_max)
-          {
-            re->literal_string_max *= 2;
-
-            re->literal_string = yr_realloc(
-                re->literal_string,
-                re->literal_string_max);
-
-            ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
-          }
-
-          re->literal_string[re->literal_string_len] = (yyvsp[(1) - (1)].integer);
-          re->literal_string_len++;
         }
     break;
 
-  case 13:
-#line 215 "hex_grammar.y"
+  case 15:
+#line 223 "hex_grammar.y"
     {
           uint8_t mask = (yyvsp[(1) - (1)].integer) >> 8;
 
-          mark_as_not_literal();
-
           if (mask == 0x00)
           {
             (yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
@@ -1576,7 +1589,7 @@ yyreduce:
 
 
 /* Line 1267 of yacc.c.  */
-#line 1580 "hex_grammar.c"
+#line 1593 "hex_grammar.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1790,7 +1803,7 @@ yyreturn:
 }
 
 
-#line 238 "hex_grammar.y"
+#line 244 "hex_grammar.y"
 
 
 
diff --git a/libyara/hex_grammar.h b/libyara/hex_grammar.h
index 5d1606e..bc213c9 100644
--- a/libyara/hex_grammar.h
+++ b/libyara/hex_grammar.h
@@ -54,7 +54,7 @@
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 73 "hex_grammar.y"
+#line 71 "hex_grammar.y"
 {
   int integer;
   RE_NODE *re_node;
diff --git a/libyara/hex_grammar.y b/libyara/hex_grammar.y
index 3061104..3050e04 100644
--- a/libyara/hex_grammar.y
+++ b/libyara/hex_grammar.y
@@ -17,6 +17,7 @@ limitations under the License.
 %{
 
 #include <stdint.h>
+#include <limits.h>
 
 #include "hex_lexer.h"
 #include "mem.h"
@@ -33,9 +34,6 @@ limitations under the License.
 
 #define YYDEBUG 0
 
-#define mark_as_not_literal() \
-    ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
 #define mark_as_not_fast_hex_regexp() \
     ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_FAST_HEX_REGEXP
 
@@ -116,13 +114,17 @@ token : byte
         {
           $$ = $1;
         }
-      | '(' alternatives ')'
+      | '('
         {
-          $$ = $2;
+          lex_env->inside_or++;
+        }
+        alternatives ')'
+        {
+          $$ = $3;
+          lex_env->inside_or--;
         }
       | '[' range ']'
         {
-          mark_as_not_literal();
           $$ = $2;
           $$->greedy = FALSE;
         }
@@ -167,6 +169,29 @@ range : _NUMBER_
           $$->start = $1;
           $$->end = $3;
         }
+      | '.' '.'
+        {
+          RE_NODE* re_any;
+
+          if (lex_env->inside_or)
+          {
+            RE* re = yyget_extra(yyscanner);
+            re->error_code = ERROR_INVALID_HEX_STRING;
+            re->error_message = yr_strdup("[..] not allowed inside OR (|)");
+            YYABORT;
+          }
+
+          re_any = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
+
+          ERROR_IF(re_any == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          $$ = yr_re_node_create(RE_NODE_RANGE, re_any, NULL);
+
+          ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
+
+          $$->start = 0;
+          $$->end = INT_MAX;
+        }
       ;
 
 
@@ -176,7 +201,6 @@ alternatives : tokens
                }
              | alternatives '|' tokens
                {
-                  mark_as_not_literal();
                   mark_as_not_fast_hex_regexp();
 
                   $$ = yr_re_node_create(RE_NODE_ALT, $1, $3);
@@ -189,34 +213,16 @@ alternatives : tokens
 
 byte  : _BYTE_
         {
-          RE* re = yyget_extra(yyscanner);
-
           $$ = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
 
           ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
 
           $$->value = $1;
-
-          if (re->literal_string_len == re->literal_string_max)
-          {
-            re->literal_string_max *= 2;
-
-            re->literal_string = yr_realloc(
-                re->literal_string,
-                re->literal_string_max);
-
-            ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
-          }
-
-          re->literal_string[re->literal_string_len] = $1;
-          re->literal_string_len++;
         }
       | _MASKED_BYTE_
         {
           uint8_t mask = $1 >> 8;
 
-          mark_as_not_literal();
-
           if (mask == 0x00)
           {
             $$ = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c
index 25c6c36..d462d99 100644
--- a/libyara/hex_lexer.c
+++ b/libyara/hex_lexer.c
@@ -362,8 +362,8 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
 	*yy_cp = '\0'; \
 	yyg->yy_c_buf_p = yy_cp;
 
-#define YY_NUM_RULES 11
-#define YY_END_OF_BUFFER 12
+#define YY_NUM_RULES 12
+#define YY_END_OF_BUFFER 13
 /* This struct is not used in this scanner,
    but its presence is necessary. */
 struct yy_trans_info
@@ -371,11 +371,11 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[22] =
+static yyconst flex_int16_t yy_accept[23] =
     {   0,
-        0,    0,    0,    0,   12,   10,    9,    9,   10,   10,
-        5,   11,    6,    7,    8,    1,    2,    3,    4,    7,
-        0
+        0,    0,    0,    0,   13,   11,   10,   10,   11,   11,
+        5,   12,    6,    7,    8,    9,    1,    2,    3,    4,
+        8,    0
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -384,14 +384,14 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    2,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    2,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    4,    1,    1,    5,    5,    5,
-        5,    5,    5,    5,    5,    5,    5,    1,    1,    1,
-        1,    1,    6,    1,    7,    7,    7,    7,    7,    7,
+        1,    1,    1,    1,    4,    5,    1,    6,    6,    6,
+        6,    6,    6,    6,    6,    6,    6,    1,    1,    1,
+        1,    1,    7,    1,    8,    8,    8,    8,    8,    8,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        8,    1,    9,    1,    1,    1,    7,    7,    7,    7,
+        9,    1,   10,    1,    1,    1,    8,    8,    8,    8,
 
-        7,    7,    1,    1,    1,    1,    1,    1,    1,    1,
+        8,    8,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -410,45 +410,45 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[10] =
+static yyconst flex_int32_t yy_meta[11] =
     {   0,
-        1,    1,    1,    1,    2,    2,    2,    1,    1
+        1,    1,    1,    1,    1,    2,    2,    2,    1,    1
     } ;
 
-static yyconst flex_int16_t yy_base[25] =
+static yyconst flex_int16_t yy_base[26] =
     {   0,
-        0,    0,    6,   12,   24,   25,   25,   25,   14,   13,
-       25,   25,   25,   13,   25,   25,   25,   25,   25,    9,
-       25,   21,   11,   10
+        0,    0,    7,   14,   27,   28,   28,   28,   16,   15,
+       28,   28,   28,   28,   15,   28,   28,   28,   28,   28,
+       10,   28,   24,   13,   12
     } ;
 
-static yyconst flex_int16_t yy_def[25] =
+static yyconst flex_int16_t yy_def[26] =
     {   0,
-       21,    1,   22,   22,   21,   21,   21,   21,   23,   24,
-       21,   21,   21,   21,   21,   21,   21,   21,   21,   21,
-        0,   21,   21,   21
+       22,    1,   23,   23,   22,   22,   22,   22,   24,   25,
+       22,   22,   22,   22,   22,   22,   22,   22,   22,   22,
+       22,    0,   22,   22,   22
     } ;
 
-static yyconst flex_int16_t yy_nxt[35] =
+static yyconst flex_int16_t yy_nxt[39] =
     {   0,
-        6,    7,    8,    6,    9,   10,    9,   11,    6,   13,
-       14,   18,   16,   20,   15,   13,   14,   20,   19,   17,
-       15,   12,   12,   21,    5,   21,   21,   21,   21,   21,
-       21,   21,   21,   21
+        6,    7,    8,    6,    6,    9,   10,    9,   11,    6,
+       13,   14,   15,   19,   17,   21,   16,   13,   14,   15,
+       21,   20,   18,   16,   12,   12,   22,    5,   22,   22,
+       22,   22,   22,   22,   22,   22,   22,   22
     } ;
 
-static yyconst flex_int16_t yy_chk[35] =
+static yyconst flex_int16_t yy_chk[39] =
     {   0,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    3,
-        3,   24,   23,   20,    3,    4,    4,   14,   10,    9,
-        4,   22,   22,    5,   21,   21,   21,   21,   21,   21,
-       21,   21,   21,   21
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        3,    3,    3,   25,   24,   21,    3,    4,    4,    4,
+       15,   10,    9,    4,   23,   23,    5,   22,   22,   22,
+       22,   22,   22,   22,   22,   22,   22,   22
     } ;
 
 /* Table of booleans, true if rule could match eol. */
-static yyconst flex_int32_t yy_rule_can_match_eol[12] =
+static yyconst flex_int32_t yy_rule_can_match_eol[13] =
     {   0,
-0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,     };
+0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,     };
 
 /* The intent behind this definition is that it'll catch
  * any uses of REJECT which flex missed.
@@ -796,13 +796,13 @@ yy_match:
 			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 				{
 				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 22 )
+				if ( yy_current_state >= 23 )
 					yy_c = yy_meta[(unsigned int) yy_c];
 				}
 			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 			++yy_cp;
 			}
-		while ( yy_base[yy_current_state] != 25 );
+		while ( yy_base[yy_current_state] != 28 );
 
 yy_find_action:
 		yy_act = yy_accept[yy_current_state];
@@ -896,6 +896,13 @@ case 7:
 YY_RULE_SETUP
 #line 107 "hex_lexer.l"
 {
+  return yytext[0];
+}
+	YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 111 "hex_lexer.l"
+{
 
   yylval->integer = atoi(yytext);
 
@@ -908,24 +915,24 @@ YY_RULE_SETUP
   return _NUMBER_;
 }
 	YY_BREAK
-case 8:
+case 9:
 YY_RULE_SETUP
-#line 120 "hex_lexer.l"
+#line 124 "hex_lexer.l"
 {
 
   BEGIN(INITIAL);
   return yytext[0];
 }
 	YY_BREAK
-case 9:
-/* rule 9 can match eol */
+case 10:
+/* rule 10 can match eol */
 YY_RULE_SETUP
-#line 127 "hex_lexer.l"
+#line 131 "hex_lexer.l"
 // skip whitespace
 	YY_BREAK
-case 10:
+case 11:
 YY_RULE_SETUP
-#line 130 "hex_lexer.l"
+#line 134 "hex_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -939,12 +946,12 @@ YY_RULE_SETUP
   }
 }
 	YY_BREAK
-case 11:
+case 12:
 YY_RULE_SETUP
-#line 143 "hex_lexer.l"
+#line 147 "hex_lexer.l"
 ECHO;
 	YY_BREAK
-#line 948 "hex_lexer.c"
+#line 955 "hex_lexer.c"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(range):
 	yyterminate();
@@ -1239,7 +1246,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 			{
 			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 22 )
+			if ( yy_current_state >= 23 )
 				yy_c = yy_meta[(unsigned int) yy_c];
 			}
 		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1268,11 +1275,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 		{
 		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 22 )
+		if ( yy_current_state >= 23 )
 			yy_c = yy_meta[(unsigned int) yy_c];
 		}
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 21);
+	yy_is_jam = (yy_current_state == 22);
 
 	return yy_is_jam ? 0 : yy_current_state;
 }
@@ -2080,7 +2087,7 @@ void hex_yyfree (void * ptr , yyscan_t yyscanner)
 
 #define YYTABLES_NAME "yytables"
 
-#line 143 "hex_lexer.l"
+#line 147 "hex_lexer.l"
 
 
 
@@ -2127,21 +2134,17 @@ int yr_parse_hex_string(
   LEX_ENVIRONMENT lex_env;
 
   lex_env.last_error_message = NULL;
+  lex_env.inside_or = 0;
 
   FAIL_ON_ERROR(yr_re_create(re));
 
-  // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
-  // is just a literal string and it can be matched by doing a simple string
-  // comparison, without executing any regular expression code.
-  //
   // The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
   // from a hex string that can be matched by faster algorithm. These regular
   // expressions come from hex strings not contaning alternatives
   // (like in 01 02 | 03 04).
   //
-  // These flags are unset later during parsing if necessary.
+  // This flag is unset later during parsing if necessary.
 
-  (*re)->flags |= RE_FLAGS_LITERAL_STRING;
   (*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
 
   #ifdef WIN32
diff --git a/libyara/hex_lexer.h b/libyara/hex_lexer.h
index eaeda59..24fd60a 100644
--- a/libyara/hex_lexer.h
+++ b/libyara/hex_lexer.h
@@ -39,6 +39,7 @@ typedef void* yyscan_t;
 
 typedef struct _LEX_ENVIRONMENT
 {
+  int inside_or;
   const char* last_error_message;
 
 } LEX_ENVIRONMENT;
diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l
index e8247b8..c9e1df0 100644
--- a/libyara/hex_lexer.l
+++ b/libyara/hex_lexer.l
@@ -104,6 +104,10 @@ hexdigit      [a-fA-F0-9]
   return yytext[0];
 }
 
+<range>\. {
+  return yytext[0];
+}
+
 <range>{digit}+ {
 
   yylval->integer = atoi(yytext);
@@ -185,21 +189,17 @@ int yr_parse_hex_string(
   LEX_ENVIRONMENT lex_env;
 
   lex_env.last_error_message = NULL;
+  lex_env.inside_or = 0;
 
   FAIL_ON_ERROR(yr_re_create(re));
 
-  // The RE_FLAGS_LITERAL_STRING flag indicates that the regular expression
-  // is just a literal string and it can be matched by doing a simple string
-  // comparison, without executing any regular expression code.
-  //
   // The RE_FLAGS_FAST_HEX_REGEXP flag indicates a regular expression derived
   // from a hex string that can be matched by faster algorithm. These regular
   // expressions come from hex strings not contaning alternatives
   // (like in 01 02 | 03 04).
   //
-  // These flags are unset later during parsing if necessary.
+  // This flag is unset later during parsing if necessary.
 
-  (*re)->flags |= RE_FLAGS_LITERAL_STRING;
   (*re)->flags |= RE_FLAGS_FAST_HEX_REGEXP;
 
   #ifdef WIN32
diff --git a/libyara/lexer.c b/libyara/lexer.c
index 919735a..2776d94 100644
--- a/libyara/lexer.c
+++ b/libyara/lexer.c
@@ -406,7 +406,7 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    4,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    2,    5,    6,    7,    8,    1,    1,    1,    9,
-        9,   10,    1,    1,    9,    1,   11,   12,   13,   14,
+        9,   10,    1,    1,    9,    9,   11,   12,   13,   14,
        15,   16,   16,   17,   16,   18,   16,    1,    1,   19,
        20,   21,    9,   22,   23,   24,   23,   23,   23,   23,
        25,   25,   25,   25,   26,   25,   27,   25,   25,   25,
diff --git a/libyara/lexer.l b/libyara/lexer.l
index 15ef042..66d1c79 100644
--- a/libyara/lexer.l
+++ b/libyara/lexer.l
@@ -477,7 +477,7 @@ $({letter}|{digit}|_)*  {
 }
 
 
-\{({hexdigit}|[ \-|\?\[\]\(\)\n\t])+\}  {
+\{({hexdigit}|[ \-|\?\[\]\(\)\.\n\t])+\}  {
 
   int len = strlen(yytext);
   SIZED_STRING* s = (SIZED_STRING*) yr_malloc(len + sizeof(SIZED_STRING));
diff --git a/libyara/parser.c b/libyara/parser.c
index a621604..6d04886 100644
--- a/libyara/parser.c
+++ b/libyara/parser.c
@@ -198,47 +198,192 @@ YR_EXTERNAL_VARIABLE* yr_parser_lookup_external_variable(
 }
 
 
-YR_STRING* yr_parser_reduce_string_declaration(
-    yyscan_t yyscanner,
-    int32_t flags,
+int _yr_parser_write_string(
     const char* identifier,
-    SIZED_STRING* str)
+    int flags,
+    YR_COMPILER* compiler,
+    SIZED_STRING* str,
+    RE* re,
+    YR_STRING** string,
+    int* min_atom_length)
 {
-  int min_atom_length;
-  char* file_name;
-  char message[512];
-
-  YR_STRING* string;
+  SIZED_STRING* literal_string;
   YR_AC_MATCH* new_match;
+
   YR_ATOM_LIST_ITEM* atom;
   YR_ATOM_LIST_ITEM* atom_list = NULL;
-  RE* re = NULL;
 
-  uint8_t* literal_string = NULL;
-
-  int literal_string_len = 0;
+  int result;
   int max_string_len;
+  int free_literal = FALSE;
 
-  YR_COMPILER* compiler = yyget_extra(yyscanner);
+  *string = NULL;
 
-  compiler->last_result = yr_arena_allocate_struct(
+  result = yr_arena_allocate_struct(
       compiler->strings_arena,
       sizeof(YR_STRING),
-      (void**) &string,
+      (void**) string,
       offsetof(YR_STRING, identifier),
       offsetof(YR_STRING, string),
+      offsetof(YR_STRING, chained_to),
       EOL);
 
-  if (compiler->last_result != ERROR_SUCCESS)
-    return NULL;
+  if (result != ERROR_SUCCESS)
+    return result;
 
-  compiler->last_result = yr_arena_write_string(
+  result = yr_arena_write_string(
       compiler->sz_arena,
       identifier,
-      &string->identifier);
+      &(*string)->identifier);
 
-  if (compiler->last_result != ERROR_SUCCESS)
-    return NULL;
+  if (result != ERROR_SUCCESS)
+    return result;
+
+  if (flags & STRING_GFLAGS_HEXADECIMAL ||
+      flags & STRING_GFLAGS_REGEXP)
+  {
+    literal_string = yr_re_extract_literal(re);
+
+    if (literal_string != NULL)
+    {
+      flags |= STRING_GFLAGS_LITERAL;
+      free_literal = TRUE;
+    }
+  }
+  else
+  {
+    literal_string = str;
+    flags |= STRING_GFLAGS_LITERAL;
+  }
+
+  (*string)->g_flags = flags;
+  (*string)->chained_to = NULL;
+
+  memset((*string)->matches, 0,
+         sizeof((*string)->matches));
+
+  memset((*string)->unconfirmed_matches, 0,
+         sizeof((*string)->unconfirmed_matches));
+
+  if (flags & STRING_GFLAGS_LITERAL)
+  {
+    (*string)->length = literal_string->length;
+
+    result = yr_arena_write_data(
+        compiler->sz_arena,
+        literal_string->c_string,
+        literal_string->length,
+        (void*) &(*string)->string);
+
+    if (result == ERROR_SUCCESS)
+    {
+      result = yr_atoms_extract_from_string(
+          (uint8_t*) literal_string->c_string,
+          literal_string->length,
+          flags,
+          &atom_list);
+    }
+  }
+  else
+  {
+    result = yr_re_emit_code(re, compiler->re_code_arena);
+
+    if (result == ERROR_SUCCESS)
+      result = yr_atoms_extract_from_re(re, flags, &atom_list);
+  }
+
+  if (result == ERROR_SUCCESS)
+  {
+    // Add the string to Aho-Corasick automaton.
+
+    if (atom_list != NULL)
+    {
+      result = yr_ac_add_string(
+          compiler->automaton_arena,
+          compiler->automaton,
+          *string,
+          atom_list);
+    }
+    else
+    {
+      result = yr_arena_allocate_struct(
+          compiler->automaton_arena,
+          sizeof(YR_AC_MATCH),
+          (void**) &new_match,
+          offsetof(YR_AC_MATCH, string),
+          offsetof(YR_AC_MATCH, forward_code),
+          offsetof(YR_AC_MATCH, backward_code),
+          offsetof(YR_AC_MATCH, next),
+          EOL);
+
+      if (result == ERROR_SUCCESS)
+      {
+        new_match->backtrack = 0;
+        new_match->string = *string;
+        new_match->forward_code = re->root_node->forward_code;
+        new_match->backward_code = NULL;
+        new_match->next = compiler->automaton->root->matches;
+        compiler->automaton->root->matches = new_match;
+      }
+    }
+  }
+
+  atom = atom_list;
+
+  if (atom != NULL)
+    *min_atom_length = MAX_ATOM_LENGTH;
+  else
+    *min_atom_length = 0;
+
+  while (atom != NULL)
+  {
+    if (atom->atom_length < *min_atom_length)
+      *min_atom_length = atom->atom_length;
+    atom = atom->next;
+  }
+
+  if (flags & STRING_GFLAGS_LITERAL)
+  {
+    if (flags & STRING_GFLAGS_WIDE)
+      max_string_len = (*string)->length * 2;
+    else
+      max_string_len = (*string)->length;
+
+    if (max_string_len == *min_atom_length)
+      (*string)->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
+  }
+
+  if (free_literal)
+    yr_free(literal_string);
+
+  if (atom_list != NULL)
+    yr_atoms_list_destroy(atom_list);
+
+  return result;
+}
+
+#include <stdint.h>
+#include <limits.h>
+
+
+YR_STRING* yr_parser_reduce_string_declaration(
+    yyscan_t yyscanner,
+    int32_t flags,
+    const char* identifier,
+    SIZED_STRING* str)
+{
+  int min_atom_length;
+  int min_atom_length_aux;
+
+  char* file_name;
+  char message[512];
+
+  YR_COMPILER* compiler = yyget_extra(yyscanner);
+  YR_STRING* string = NULL;
+  YR_STRING* prev_string;
+
+  RE* re = NULL;
+  RE* remainder_re;
 
   if (strcmp(identifier,"$") == 0)
     flags |= STRING_GFLAGS_ANONYMOUS;
@@ -257,10 +402,6 @@ YR_STRING* yr_parser_reduce_string_declaration(
 
   flags |= STRING_GFLAGS_SINGLE_MATCH;
 
-  string->g_flags = flags;
-
-  memset(string->matches, 0, sizeof(string->matches));
-
   if (flags & STRING_GFLAGS_HEXADECIMAL ||
       flags & STRING_GFLAGS_REGEXP)
   {
@@ -282,127 +423,84 @@ YR_STRING* yr_parser_reduce_string_declaration(
           identifier,
           re->error_message);
 
-      yr_compiler_set_error_extra_info(compiler, message);
-      string = NULL;
+      yr_compiler_set_error_extra_info(
+          compiler, message);
+
       goto _exit;
     }
 
     if (re->flags & RE_FLAGS_FAST_HEX_REGEXP)
-      string->g_flags |= STRING_GFLAGS_FAST_HEX_REGEXP;
+      flags |= STRING_GFLAGS_FAST_HEX_REGEXP;
 
-    if (re->flags & RE_FLAGS_LITERAL_STRING)
-    {
-      string->g_flags |= STRING_GFLAGS_LITERAL;
-      literal_string = re->literal_string;
-      literal_string_len = re->literal_string_len;
+    compiler->last_result = yr_re_split_at_chaining_point(
+        re, &re, &remainder_re);
 
-      compiler->last_result = yr_atoms_extract_from_string(
-          literal_string, literal_string_len, string->g_flags, &atom_list);
-    }
-    else
-    {
-      compiler->last_result = yr_re_emit_code(
-          re, compiler->re_code_arena);
-
-      if (compiler->last_result != ERROR_SUCCESS)
-      {
-        string = NULL;
-        goto _exit;
-      }
-
-      compiler->last_result = yr_atoms_extract_from_re(
-          re, string->g_flags, &atom_list);
-    }
-  }
-  else
-  {
-    string->g_flags |= STRING_GFLAGS_LITERAL;
-    literal_string = (uint8_t*) str->c_string;
-    literal_string_len = str->length;
+    if (compiler->last_result != ERROR_SUCCESS)
+      goto _exit;
 
-    compiler->last_result  = yr_atoms_extract_from_string(
-        literal_string, literal_string_len, string->g_flags, &atom_list);
-  }
+    compiler->last_result = _yr_parser_write_string(
+        identifier,
+        flags,
+        compiler,
+        NULL,
+        re,
+        &string,
+        &min_atom_length);
 
-  if (compiler->last_result != ERROR_SUCCESS)
-  {
-    string = NULL;
-    goto _exit;
-  }
+    if (compiler->last_result != ERROR_SUCCESS)
+      goto _exit;
 
-  if (STRING_IS_LITERAL(string))
-  {
-    compiler->last_result = yr_arena_write_data(
-        compiler->sz_arena,
-        literal_string,
-        literal_string_len,
-        (void*) &string->string);
+    if (remainder_re != NULL)
+      string->g_flags |= STRING_GFLAGS_CHAIN_TAIL |
+                         STRING_GFLAGS_CHAIN_PART;
 
-    if (compiler->last_result != ERROR_SUCCESS)
+    while (remainder_re != NULL)
     {
-      string = NULL;
-      goto _exit;
-    }
+      // Destroy regexp pointed by 're' before yr_re_split_at_jmp
+      // overwrites 're' with another value.
 
-    string->length = literal_string_len;
-  }
+      yr_re_destroy(re);
 
-  // Add the string to Aho-Corasick automaton.
+      compiler->last_result = yr_re_split_at_chaining_point(
+          remainder_re, &re, &remainder_re);
 
-  if (atom_list != NULL)
-  {
-    compiler->last_result = yr_ac_add_string(
-      compiler->automaton_arena,
-      compiler->automaton,
-      string,
-      atom_list);
-  }
-  else
-  {
-    compiler->last_result = yr_arena_allocate_struct(
-        compiler->automaton_arena,
-        sizeof(YR_AC_MATCH),
-        (void**) &new_match,
-        offsetof(YR_AC_MATCH, string),
-        offsetof(YR_AC_MATCH, forward_code),
-        offsetof(YR_AC_MATCH, backward_code),
-        offsetof(YR_AC_MATCH, next),
-        EOL);
+      if (compiler->last_result != ERROR_SUCCESS)
+        goto _exit;
 
-    if (compiler->last_result == ERROR_SUCCESS)
-    {
-      new_match->backtrack = 0;
-      new_match->string = string;
-      new_match->forward_code = re->root_node->forward_code;
-      new_match->backward_code = NULL;
-      new_match->next = compiler->automaton->root->matches;
-      compiler->automaton->root->matches = new_match;
-    }
-  }
+      prev_string = string;
 
-  atom = atom_list;
+      compiler->last_result = _yr_parser_write_string(
+          identifier,
+          flags,
+          compiler,
+          NULL,
+          re,
+          &string,
+          &min_atom_length_aux);
 
-  if (atom != NULL)
-    min_atom_length = MAX_ATOM_LENGTH;
-  else
-    min_atom_length = 0;
+      if (compiler->last_result != ERROR_SUCCESS)
+        goto _exit;
 
-  while (atom != NULL)
-  {
-    if (atom->atom_length < min_atom_length)
-      min_atom_length = atom->atom_length;
-    atom = atom->next;
-  }
+      if (min_atom_length_aux < min_atom_length)
+        min_atom_length = min_atom_length_aux;
 
-  if (STRING_IS_LITERAL(string))
+      string->g_flags |= STRING_GFLAGS_CHAIN_PART;
+      prev_string->chained_to = string;
+    }
+  }
+  else
   {
-    if (STRING_IS_WIDE(string))
-      max_string_len = string->length * 2;
-    else
-      max_string_len = string->length;
+    compiler->last_result = _yr_parser_write_string(
+        identifier,
+        flags,
+        compiler,
+        str,
+        NULL,
+        &string,
+        &min_atom_length);
 
-    if (max_string_len == min_atom_length)
-      string->g_flags |= STRING_GFLAGS_FITS_IN_ATOM;
+    if (compiler->last_result != ERROR_SUCCESS)
+      goto _exit;
   }
 
   if (compiler->file_name_stack_ptr > 0)
@@ -426,17 +524,14 @@ YR_STRING* yr_parser_reduce_string_declaration(
         message);
   }
 
-  if (compiler->last_result != ERROR_SUCCESS)
-    string = NULL;
-
 _exit:
 
-  if (atom_list != NULL)
-    yr_atoms_list_destroy(atom_list);
-
   if (re != NULL)
     yr_re_destroy(re);
 
+  if (compiler->last_result != ERROR_SUCCESS)
+    return NULL;
+
   return string;
 }
 
diff --git a/libyara/re.c b/libyara/re.c
index 3a2b5f8..f6e69b9 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -28,6 +28,7 @@ order to avoid confusion with operating system threads.
 #include <assert.h>
 #include <ctype.h>
 #include <string.h>
+#include <limits.h>
 
 #ifdef WIN32
 #include <windows.h>
@@ -215,16 +216,6 @@ int yr_re_create(
   if (*re == NULL)
     return ERROR_INSUFICIENT_MEMORY;
 
-  (*re)->literal_string_len = 0;
-  (*re)->literal_string_max = 128;
-  (*re)->literal_string = yr_malloc(128);
-
-  if ((*re)->literal_string == NULL)
-  {
-    yr_free(*re);
-    return ERROR_INSUFICIENT_MEMORY;
-  }
-
   (*re)->flags = 0;
   (*re)->root_node = NULL;
   (*re)->error_message = NULL;
@@ -243,9 +234,6 @@ void yr_re_destroy(
   if (re->error_message != NULL)
     yr_free((char*) re->error_message);
 
-  if (re->literal_string != NULL)
-    yr_free(re->literal_string);
-
   yr_free(re);
 }
 
@@ -265,6 +253,123 @@ int yr_re_compile_hex(
   return yr_parse_hex_string(hex_string, re);
 }
 
+//
+// yr_re_extract_literal
+//
+// Verifies if the provided regular expression is just a literal string
+// like "abc", "12345", without any wildcard, operator, etc. In that case
+// returns the string as a SIZED_STRING, or returns NULL if otherwise.
+//
+// The caller is responsible for deallocating the returned SIZED_STRING by
+// calling yr_free.
+//
+
+SIZED_STRING* yr_re_extract_literal(
+    RE* re)
+{
+  SIZED_STRING* string;
+  RE_NODE* node = re->root_node;
+
+  int i, length = 0;
+  char tmp;
+
+  while (node != NULL)
+  {
+    length++;
+
+    if (node->type == RE_NODE_LITERAL)
+      break;
+
+    if (node->type != RE_NODE_CONCAT)
+      return NULL;
+
+    if (node->right == NULL ||
+        node->right->type != RE_NODE_LITERAL)
+      return NULL;
+
+    node = node->left;
+  }
+
+  string = yr_malloc(sizeof(SIZED_STRING) + length);
+  string->length = 0;
+
+  if (string == NULL)
+    return NULL;
+
+  node = re->root_node;
+
+  while (node->type == RE_NODE_CONCAT)
+  {
+    string->c_string[string->length++] = node->right->value;
+    node = node->left;
+  }
+
+  string->c_string[string->length++] = node->value;
+
+  // The string ends up reversed. Reverse it back to its original value.
+
+  for (i = 0; i < length / 2; i++)
+  {
+    tmp = string->c_string[i];
+    string->c_string[i] = string->c_string[length - i - 1];
+    string->c_string[length - i - 1] = tmp;
+  }
+
+  return string;
+}
+
+
+int yr_re_split_at_chaining_point(
+    RE* re,
+    RE** result_re,
+    RE** remainder_re)
+{
+  RE_NODE* node = re->root_node;
+  RE_NODE* child = re->root_node->left;
+  RE_NODE* parent = NULL;
+
+  int result;
+
+  *result_re = re;
+  *remainder_re = NULL;
+
+  while (child != NULL && child->type == RE_NODE_CONCAT)
+  {
+    if (child->right != NULL &&
+        child->right->type == RE_NODE_RANGE &&
+        child->right->greedy == FALSE &&
+        child->right->start == 0 &&
+        child->right->end == INT_MAX &&
+        child->right->left->type == RE_NODE_ANY)
+    {
+      result = yr_re_create(remainder_re);
+
+      if (result != ERROR_SUCCESS)
+        return result;
+
+      (*remainder_re)->root_node = child->left;
+
+      child->left = NULL;
+
+      if (parent != NULL)
+        parent->left = node->right;
+      else
+        (*result_re)->root_node = node->right;
+
+      node->right = NULL;
+      yr_re_node_destroy(node);
+
+      return ERROR_SUCCESS;
+    }
+
+    parent = node;
+    node = child;
+    child = child->left;
+  }
+
+  return ERROR_SUCCESS;
+}
+
 
 int _yr_emit_inst(
     YR_ARENA* arena,
diff --git a/libyara/re.h b/libyara/re.h
index 3e12cd2..f1a0dae 100644
--- a/libyara/re.h
+++ b/libyara/re.h
@@ -18,6 +18,7 @@ limitations under the License.
 #define _RE_H
 
 #include "yara.h"
+#include "sizedstr.h"
 
 #define RE_NODE_LITERAL             1
 #define RE_NODE_MASKED_LITERAL      2
@@ -61,7 +62,7 @@ limitations under the License.
 #define RE_OPCODE_JNZ               0xB4
 #define RE_OPCODE_JUMP              0xB5
 
-#define RE_FLAGS_LITERAL_STRING           0x01
+
 #define RE_FLAGS_FAST_HEX_REGEXP          0x02
 #define RE_FLAGS_BACKWARDS                0x04
 #define RE_FLAGS_EXHAUSTIVE               0x08
@@ -113,11 +114,6 @@ struct RE {
 
   const char* error_message;
   int error_code;
-
-  uint8_t* literal_string;
-
-  int literal_string_len;
-  int literal_string_max;
 };
 
 
@@ -166,10 +162,21 @@ void yr_re_node_destroy(
   RE_NODE* node);
 
 
+SIZED_STRING* yr_re_extract_literal(
+    RE* re);
+
+
+int yr_re_split_at_chaining_point(
+    RE* re,
+    RE** result_re,
+    RE** remainder_re);
+
+
 int yr_re_emit_code(
     RE* re,
     YR_ARENA* arena);
 
+
 int yr_re_exec(
     uint8_t* code,
     uint8_t* input,
diff --git a/libyara/re_grammar.c b/libyara/re_grammar.c
index 244f18c..29f9778 100644
--- a/libyara/re_grammar.c
+++ b/libyara/re_grammar.c
@@ -126,11 +126,6 @@
 yydebug = 1;
 #endif
 
-
-#define mark_as_not_literal() \
-    ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
-
 #define ERROR_IF(x, error) \
     if (x) \
     { \
@@ -167,7 +162,7 @@ yydebug = 1;
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 72 "re_grammar.y"
+#line 67 "re_grammar.y"
 {
   int integer;
   uint32_t range;
@@ -175,7 +170,7 @@ typedef union YYSTYPE
   uint8_t* class_vector;
 }
 /* Line 193 of yacc.c.  */
-#line 179 "re_grammar.c"
+#line 174 "re_grammar.c"
 	YYSTYPE;
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */
 # define YYSTYPE_IS_DECLARED 1
@@ -188,7 +183,7 @@ typedef union YYSTYPE
 
 
 /* Line 216 of yacc.c.  */
-#line 192 "re_grammar.c"
+#line 187 "re_grammar.c"
 
 #ifdef short
 # undef short
@@ -479,9 +474,9 @@ static const yytype_int8 yyrhs[] =
 /* YYRLINE[YYN] -- source line where rule number YYN was defined.  */
 static const yytype_uint16 yyrline[] =
 {
-       0,   101,   101,   106,   109,   113,   123,   139,   143,   153,
-     161,   171,   179,   189,   200,   212,   223,   227,   234,   243,
-     247,   254,   277,   284,   291,   298,   305,   312,   319
+       0,    96,    96,   101,   104,   108,   117,   132,   136,   146,
+     153,   162,   169,   178,   188,   199,   209,   213,   219,   227,
+     231,   237,   245,   251,   257,   263,   269,   275,   281
 };
 #endif
 
@@ -1110,29 +1105,29 @@ yydestruct (yymsg, yytype, yyvaluep, yyscanner, lex_env)
   switch (yytype)
     {
       case 6: /* "_CLASS_" */
-#line 93 "re_grammar.y"
+#line 88 "re_grammar.y"
 	{ yr_free((yyvaluep->class_vector)); };
-#line 1116 "re_grammar.c"
+#line 1111 "re_grammar.c"
 	break;
       case 24: /* "alternative" */
-#line 94 "re_grammar.y"
+#line 89 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1121 "re_grammar.c"
+#line 1116 "re_grammar.c"
 	break;
       case 25: /* "concatenation" */
-#line 95 "re_grammar.y"
+#line 90 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1126 "re_grammar.c"
+#line 1121 "re_grammar.c"
 	break;
       case 26: /* "repeat" */
-#line 96 "re_grammar.y"
+#line 91 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1131 "re_grammar.c"
+#line 1126 "re_grammar.c"
 	break;
       case 27: /* "single" */
-#line 97 "re_grammar.y"
+#line 92 "re_grammar.y"
 	{ yr_re_node_destroy((yyvaluep->re_node)); };
-#line 1136 "re_grammar.c"
+#line 1131 "re_grammar.c"
 	break;
 
       default:
@@ -1442,7 +1437,7 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 102 "re_grammar.y"
+#line 97 "re_grammar.y"
     {
         RE* re = yyget_extra(yyscanner);
         re->root_node = (yyvsp[(1) - (1)].re_node);
@@ -1450,16 +1445,15 @@ yyreduce:
     break;
 
   case 4:
-#line 110 "re_grammar.y"
+#line 105 "re_grammar.y"
     {
                 (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
               }
     break;
 
   case 5:
-#line 114 "re_grammar.y"
+#line 109 "re_grammar.y"
     {
-                mark_as_not_literal();
                 (yyval.re_node) = yr_re_node_create(RE_NODE_ALT, (yyvsp[(1) - (3)].re_node), (yyvsp[(3) - (3)].re_node));
 
                 DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1470,11 +1464,10 @@ yyreduce:
     break;
 
   case 6:
-#line 124 "re_grammar.y"
+#line 118 "re_grammar.y"
     {
                 RE_NODE* node;
 
-                mark_as_not_literal();
                 node = yr_re_node_create(RE_NODE_EMPTY, NULL, NULL);
 
                 DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1487,14 +1480,14 @@ yyreduce:
     break;
 
   case 7:
-#line 140 "re_grammar.y"
+#line 133 "re_grammar.y"
     {
                   (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
                 }
     break;
 
   case 8:
-#line 144 "re_grammar.y"
+#line 137 "re_grammar.y"
     {
                   (yyval.re_node) = yr_re_node_create(RE_NODE_CONCAT, (yyvsp[(1) - (2)].re_node), (yyvsp[(2) - (2)].re_node));
 
@@ -1505,9 +1498,8 @@ yyreduce:
     break;
 
   case 9:
-#line 154 "re_grammar.y"
+#line 147 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (2)].re_node), NULL);
 
             DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1516,9 +1508,8 @@ yyreduce:
     break;
 
   case 10:
-#line 162 "re_grammar.y"
+#line 154 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_STAR, (yyvsp[(1) - (3)].re_node), NULL);
 
             DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1529,9 +1520,8 @@ yyreduce:
     break;
 
   case 11:
-#line 172 "re_grammar.y"
+#line 163 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (2)].re_node), NULL);
 
             DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1540,9 +1530,8 @@ yyreduce:
     break;
 
   case 12:
-#line 180 "re_grammar.y"
+#line 170 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_PLUS, (yyvsp[(1) - (3)].re_node), NULL);
 
             DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1553,9 +1542,8 @@ yyreduce:
     break;
 
   case 13:
-#line 190 "re_grammar.y"
+#line 179 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
 
             DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1567,9 +1555,8 @@ yyreduce:
     break;
 
   case 14:
-#line 201 "re_grammar.y"
+#line 189 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (3)].re_node), NULL);
 
             DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (3)].re_node));
@@ -1582,9 +1569,8 @@ yyreduce:
     break;
 
   case 15:
-#line 213 "re_grammar.y"
+#line 200 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_RANGE, (yyvsp[(1) - (2)].re_node), NULL);
 
             DESTROY_NODE_IF((yyval.re_node) == NULL, (yyvsp[(1) - (2)].re_node));
@@ -1596,16 +1582,15 @@ yyreduce:
     break;
 
   case 16:
-#line 224 "re_grammar.y"
+#line 210 "re_grammar.y"
     {
             (yyval.re_node) = (yyvsp[(1) - (1)].re_node);
          }
     break;
 
   case 17:
-#line 228 "re_grammar.y"
+#line 214 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1613,9 +1598,8 @@ yyreduce:
     break;
 
   case 18:
-#line 235 "re_grammar.y"
+#line 220 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1623,16 +1607,15 @@ yyreduce:
     break;
 
   case 19:
-#line 244 "re_grammar.y"
+#line 228 "re_grammar.y"
     {
             (yyval.re_node) = (yyvsp[(2) - (3)].re_node);
          }
     break;
 
   case 20:
-#line 248 "re_grammar.y"
+#line 232 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1640,35 +1623,19 @@ yyreduce:
     break;
 
   case 21:
-#line 255 "re_grammar.y"
+#line 238 "re_grammar.y"
     {
-            RE* re = yyget_extra(yyscanner);
-
             (yyval.re_node) = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
 
             (yyval.re_node)->value = (yyvsp[(1) - (1)].integer);
-
-            if (re->literal_string_len == re->literal_string_max)
-            {
-              re->literal_string_max *= 2;
-              re->literal_string = yr_realloc(
-                  re->literal_string,
-                  re->literal_string_max);
-
-              ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
-            }
-
-            re->literal_string[re->literal_string_len] = (yyvsp[(1) - (1)].integer);
-            re->literal_string_len++;
          }
     break;
 
   case 22:
-#line 278 "re_grammar.y"
+#line 246 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_WORD_CHAR, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1676,9 +1643,8 @@ yyreduce:
     break;
 
   case 23:
-#line 285 "re_grammar.y"
+#line 252 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_NON_WORD_CHAR, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1686,9 +1652,8 @@ yyreduce:
     break;
 
   case 24:
-#line 292 "re_grammar.y"
+#line 258 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_SPACE, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1696,9 +1661,8 @@ yyreduce:
     break;
 
   case 25:
-#line 299 "re_grammar.y"
+#line 264 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_NON_SPACE, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1706,9 +1670,8 @@ yyreduce:
     break;
 
   case 26:
-#line 306 "re_grammar.y"
+#line 270 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_DIGIT, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1716,9 +1679,8 @@ yyreduce:
     break;
 
   case 27:
-#line 313 "re_grammar.y"
+#line 276 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_NON_DIGIT, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1726,9 +1688,8 @@ yyreduce:
     break;
 
   case 28:
-#line 320 "re_grammar.y"
+#line 282 "re_grammar.y"
     {
-            mark_as_not_literal();
             (yyval.re_node) = yr_re_node_create(RE_NODE_CLASS, NULL, NULL);
 
             ERROR_IF((yyval.re_node) == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -1739,7 +1700,7 @@ yyreduce:
 
 
 /* Line 1267 of yacc.c.  */
-#line 1743 "re_grammar.c"
+#line 1704 "re_grammar.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -1953,7 +1914,7 @@ yyreturn:
 }
 
 
-#line 331 "re_grammar.y"
+#line 292 "re_grammar.y"
 
 
 
diff --git a/libyara/re_grammar.h b/libyara/re_grammar.h
index d4095f6..08e72c7 100644
--- a/libyara/re_grammar.h
+++ b/libyara/re_grammar.h
@@ -68,7 +68,7 @@
 
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 typedef union YYSTYPE
-#line 72 "re_grammar.y"
+#line 67 "re_grammar.y"
 {
   int integer;
   uint32_t range;
diff --git a/libyara/re_grammar.y b/libyara/re_grammar.y
index 268677e..5d8c59a 100644
--- a/libyara/re_grammar.y
+++ b/libyara/re_grammar.y
@@ -37,11 +37,6 @@ limitations under the License.
 yydebug = 1;
 #endif
 
-
-#define mark_as_not_literal() \
-    ((RE*) yyget_extra(yyscanner))->flags &= ~RE_FLAGS_LITERAL_STRING
-
-
 #define ERROR_IF(x, error) \
     if (x) \
     { \
@@ -112,7 +107,6 @@ alternative : concatenation
               }
             | alternative '|' concatenation
               {
-                mark_as_not_literal();
                 $$ = yr_re_node_create(RE_NODE_ALT, $1, $3);
 
                 DESTROY_NODE_IF($$ == NULL, $1);
@@ -124,7 +118,6 @@ alternative : concatenation
               {
                 RE_NODE* node;
 
-                mark_as_not_literal();
                 node = yr_re_node_create(RE_NODE_EMPTY, NULL, NULL);
 
                 DESTROY_NODE_IF($$ == NULL, $1);
@@ -152,7 +145,6 @@ concatenation : repeat
 
 repeat : single '*'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_STAR, $1, NULL);
 
             DESTROY_NODE_IF($$ == NULL, $1);
@@ -160,7 +152,6 @@ repeat : single '*'
          }
        | single '*' '?'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_STAR, $1, NULL);
 
             DESTROY_NODE_IF($$ == NULL, $1);
@@ -170,7 +161,6 @@ repeat : single '*'
          }
        | single '+'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_PLUS, $1, NULL);
 
             DESTROY_NODE_IF($$ == NULL, $1);
@@ -178,7 +168,6 @@ repeat : single '*'
          }
        | single '+' '?'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_PLUS, $1, NULL);
 
             DESTROY_NODE_IF($$ == NULL, $1);
@@ -188,7 +177,6 @@ repeat : single '*'
          }
        | single '?'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL);
 
             DESTROY_NODE_IF($$ == NULL, $1);
@@ -199,7 +187,6 @@ repeat : single '*'
          }
        | single '?' '?'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL);
 
             DESTROY_NODE_IF($$ == NULL, $1);
@@ -211,7 +198,6 @@ repeat : single '*'
          }
        | single _RANGE_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_RANGE, $1, NULL);
 
             DESTROY_NODE_IF($$ == NULL, $1);
@@ -226,14 +212,12 @@ repeat : single '*'
          }
        | '^'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_ANCHOR_START, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | '$'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_ANCHOR_END, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
@@ -246,79 +230,56 @@ single : '(' alternative ')'
          }
        | '.'
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_ANY, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | _CHAR_
          {
-            RE* re = yyget_extra(yyscanner);
-
             $$ = yr_re_node_create(RE_NODE_LITERAL, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
 
             $$->value = $1;
-
-            if (re->literal_string_len == re->literal_string_max)
-            {
-              re->literal_string_max *= 2;
-              re->literal_string = yr_realloc(
-                  re->literal_string,
-                  re->literal_string_max);
-
-              ERROR_IF(re->literal_string == NULL, ERROR_INSUFICIENT_MEMORY);
-            }
-
-            re->literal_string[re->literal_string_len] = $1;
-            re->literal_string_len++;
          }
        | _WORD_CHAR_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_WORD_CHAR, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | _NON_WORD_CHAR_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_NON_WORD_CHAR, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | _SPACE_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_SPACE, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | _NON_SPACE_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_NON_SPACE, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | _DIGIT_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_DIGIT, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | _NON_DIGIT_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_NON_DIGIT, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
          }
        | _CLASS_
          {
-            mark_as_not_literal();
             $$ = yr_re_node_create(RE_NODE_CLASS, NULL, NULL);
 
             ERROR_IF($$ == NULL, ERROR_INSUFICIENT_MEMORY);
diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index 6d6e1c4..9d861cb 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -2504,14 +2504,6 @@ int yr_parse_re_string(
 
   FAIL_ON_ERROR(yr_re_create(re));
 
-  // The RE_FLAGS_LITERAL_STRING flag indicates that the
-  // regular expression is just a literal string and it can
-  // be matched by doing a simple string comparison, without
-  // executing any regular expression code. We initially set
-  // this flag which is unset later during parsing if necessary.
-
-  (*re)->flags |= RE_FLAGS_LITERAL_STRING;
-
   #ifdef WIN32
   TlsSetValue(recovery_state_key, (LPVOID) &recovery_state);
   #else
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index a504377..4f5867d 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -466,14 +466,6 @@ int yr_parse_re_string(
 
   FAIL_ON_ERROR(yr_re_create(re));
 
-  // The RE_FLAGS_LITERAL_STRING flag indicates that the
-  // regular expression is just a literal string and it can
-  // be matched by doing a simple string comparison, without
-  // executing any regular expression code. We initially set
-  // this flag which is unset later during parsing if necessary.
-
-  (*re)->flags |= RE_FLAGS_LITERAL_STRING;
-
   #ifdef WIN32
   TlsSetValue(recovery_state_key, (LPVOID) &recovery_state);
   #else
diff --git a/libyara/rules.c b/libyara/rules.c
index b0d6fdc..e12b76f 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -300,7 +300,60 @@ int _yr_scan_fast_hex_re_exec(
   return -1;
 }
 
-void match_callback(
+void _yr_scan_confirm_matches(
+    int tidx,
+    YR_STRING* string,
+    size_t match_offset,
+    int match_length)
+{
+  YR_MATCH* match;
+  YR_MATCH* next_match;
+
+  if (string->chained_to == NULL)
+    return;
+
+  match = string->chained_to->unconfirmed_matches[tidx].head;
+
+  while (match != NULL)
+  {
+    next_match = match->next;
+
+    if (match_offset >= match->first_offset + match->length)
+    {
+      if (match->prev != NULL)
+        match->prev->next = match->next;
+
+      if (match->next != NULL)
+        match->next->prev = match->prev;
+
+      if (match == string->chained_to->unconfirmed_matches[tidx].head)
+        string->chained_to->unconfirmed_matches[tidx].head = match->next;
+
+      if (match == string->chained_to->unconfirmed_matches[tidx].tail)
+        string->chained_to->unconfirmed_matches[tidx].tail = match->prev;
+
+      match->prev = string->chained_to->matches[tidx].tail;
+      match->next = NULL;
+      match->length = match_offset - match->first_offset + match_length;
+
+      if (string->chained_to->matches[tidx].head == NULL)
+        string->chained_to->matches[tidx].head = match;
+
+      if (string->chained_to->matches[tidx].tail != NULL)
+        string->chained_to->matches[tidx].tail->next = match;
+
+      string->chained_to->matches[tidx].tail = match;
+
+      _yr_scan_confirm_matches(
+          tidx, string->chained_to, match->first_offset, match->length);
+    }
+
+    match = next_match;
+  }
+}
+
+
+void _yr_rules_match_callback(
     uint8_t* match_data,
     int match_length,
     int flags,
@@ -308,6 +361,7 @@ void match_callback(
 {
   YR_MATCH* new_match;
   YR_MATCH* match;
+  YR_MATCHES* matches;
 
   CALLBACK_ARGS* callback_args = args;
   YR_STRING* string = callback_args->string;
@@ -358,7 +412,18 @@ void match_callback(
     }
   }
 
-  match = string->matches[tidx].tail;
+  if (STRING_IS_CHAIN_TAIL(string))
+  {
+    _yr_scan_confirm_matches(tidx, string, match_offset, match_length);
+    return;
+  }
+
+  if (STRING_IS_CHAIN_PART(string))
+    matches = &string->unconfirmed_matches[tidx];
+  else
+    matches = &string->matches[tidx];
+
+  match = matches->tail;
 
   while (match != NULL)
   {
@@ -397,34 +462,29 @@ void match_callback(
   new_match->first_offset = match_offset;
   new_match->last_offset = match_offset;
   new_match->length = match_length;
+  new_match->data = match_data;
 
   if (match != NULL)
   {
     new_match->next = match->next;
+    new_match->prev = match;
     match->next = new_match;
   }
   else
   {
-    new_match->next = string->matches[tidx].head;
-    string->matches[tidx].head = new_match;
+    new_match->next = matches->head;
+    matches->head = new_match;
   }
 
   if (new_match->next != NULL)
     new_match->next->prev = new_match;
   else
-    string->matches[tidx].tail = new_match;
+    matches->tail = new_match;
 
   new_match->prev = match;
-  //TODO: handle errors
-  yr_arena_write_data(
-      callback_args->matches_arena,
-      match_data,
-      match_length,
-      (void**) &new_match->data);
 }
 
 
-
 typedef int (*RE_EXEC_FUNC)(
     uint8_t* code,
     uint8_t* input,
@@ -503,12 +563,12 @@ int _yr_scan_verify_re_match(
         data + offset,
         offset + 1,
         flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
-        match_callback,
+        _yr_rules_match_callback,
         (void*) &callback_args);
   }
   else
   {
-    match_callback(
+    _yr_rules_match_callback(
         data + offset, 0, flags, &callback_args);
   }
 
@@ -617,7 +677,7 @@ int _yr_scan_verify_literal_match(
     callback_args.full_word = STRING_IS_FULL_WORD(string);
     callback_args.tidx = yr_get_tidx();
 
-    match_callback(
+    _yr_rules_match_callback(
         data + offset, 0, flags, &callback_args);
   }
 
@@ -783,6 +843,8 @@ void _yr_rules_clean_matches(
     {
       string->matches[tidx].head = NULL;
       string->matches[tidx].tail = NULL;
+      string->unconfirmed_matches[tidx].head = NULL;
+      string->unconfirmed_matches[tidx].tail = NULL;
       string++;
     }
 
diff --git a/libyara/yara.h b/libyara/yara.h
index b0eec6c..3512495 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -56,40 +56,35 @@ typedef pthread_mutex_t mutex_t;
 #endif
 
 #define ERROR_INSUFICIENT_MEMORY                1
-#define ERROR_DUPLICATE_RULE_IDENTIFIER         2
-#define ERROR_INVALID_HEX_STRING                3
-#define ERROR_UNDEFINED_STRING                  4
-#define ERROR_UNDEFINED_IDENTIFIER              5
-#define ERROR_COULD_NOT_OPEN_FILE               6
-#define ERROR_INVALID_REGULAR_EXPRESSION        7
-#define ERROR_SYNTAX_ERROR                      8
-#define ERROR_DUPLICATE_TAG_IDENTIFIER          9
-#define ERROR_UNREFERENCED_STRING               10
-#define ERROR_DUPLICATE_STRING_IDENTIFIER       11
-#define ERROR_CALLBACK_ERROR                    12
-#define ERROR_MISPLACED_OR_OPERATOR             13
-#define ERROR_INVALID_OR_OPERATION_SYNTAX       14
-#define ERROR_SKIP_INSIDE_OR_OPERATION          15
-#define ERROR_NESTED_OR_OPERATION               16
-#define ERROR_MISPLACED_ANONYMOUS_STRING        17
-#define ERROR_COULD_NOT_MAP_FILE                18
-#define ERROR_ZERO_LENGTH_FILE                  19
-#define ERROR_INVALID_ARGUMENT                  20
-#define ERROR_DUPLICATE_META_IDENTIFIER         21
+#define ERROR_COULD_NOT_ATTACH_TO_PROCESS       2
+#define ERROR_COULD_NOT_OPEN_FILE               3
+#define ERROR_COULD_NOT_MAP_FILE                4
+#define ERROR_ZERO_LENGTH_FILE                  5
+#define ERROR_INVALID_FILE                      6
+#define ERROR_CORRUPT_FILE                      7
+#define ERROR_UNSUPPORTED_FILE_VERSION          8
+#define ERROR_INVALID_REGULAR_EXPRESSION        9
+#define ERROR_INVALID_HEX_STRING                10
+#define ERROR_SYNTAX_ERROR                      11
+#define ERROR_LOOP_NESTING_LIMIT_EXCEEDED       12
+#define ERROR_DUPLICATE_LOOP_IDENTIFIER         13
+#define ERROR_DUPLICATE_RULE_IDENTIFIER         14
+#define ERROR_DUPLICATE_TAG_IDENTIFIER          15
+#define ERROR_DUPLICATE_META_IDENTIFIER         16
+#define ERROR_DUPLICATE_STRING_IDENTIFIER       17
+#define ERROR_UNREFERENCED_STRING               18
+#define ERROR_UNDEFINED_STRING                  19
+#define ERROR_UNDEFINED_IDENTIFIER              20
+#define ERROR_MISPLACED_ANONYMOUS_STRING        21
 #define ERROR_INCLUDES_CIRCULAR_REFERENCE       22
-#define ERROR_INCORRECT_VARIABLE_TYPE           23
-#define ERROR_COULD_NOT_ATTACH_TO_PROCESS       24
-#define ERROR_VECTOR_TOO_LONG                   25
-#define ERROR_INCLUDE_DEPTH_EXCEEDED            26
-#define ERROR_INVALID_FILE                      27
-#define ERROR_CORRUPT_FILE                      28
-#define ERROR_UNSUPPORTED_FILE_VERSION          29
-#define ERROR_EXEC_STACK_OVERFLOW               30
-#define ERROR_SCAN_TIMEOUT                      31
-#define ERROR_LOOP_NESTING_LIMIT_EXCEEDED       32
-#define ERROR_DUPLICATE_LOOP_IDENTIFIER         33
-#define ERROR_TOO_MANY_SCAN_THREADS             34
-#define ERROR_INTERNAL_FATAL_ERROR              35
+#define ERROR_INCLUDE_DEPTH_EXCEEDED            23
+#define ERROR_INCORRECT_VARIABLE_TYPE           24
+#define ERROR_EXEC_STACK_OVERFLOW               25
+#define ERROR_SCAN_TIMEOUT                      26
+#define ERROR_TOO_MANY_SCAN_THREADS             27
+#define ERROR_CALLBACK_ERROR                    28
+#define ERROR_INVALID_ARGUMENT                  29
+#define ERROR_INTERNAL_FATAL_ERROR              30
 
 
 #define CALLBACK_MSG_RULE_MATCHING            1
@@ -169,6 +164,8 @@ typedef pthread_mutex_t mutex_t;
 #define STRING_GFLAGS_LITERAL           0x400
 #define STRING_GFLAGS_FITS_IN_ATOM      0x800
 #define STRING_GFLAGS_NULL              0x1000
+#define STRING_GFLAGS_CHAIN_PART        0x2000
+#define STRING_GFLAGS_CHAIN_TAIL        0x4000
 
 #define STRING_IS_HEX(x) \
     (((x)->g_flags) & STRING_GFLAGS_HEXADECIMAL)
@@ -203,6 +200,12 @@ typedef pthread_mutex_t mutex_t;
 #define STRING_IS_FAST_HEX_REGEXP(x) \
     (((x)->g_flags) & STRING_GFLAGS_FAST_HEX_REGEXP)
 
+#define STRING_IS_CHAIN_PART(x) \
+    (((x)->g_flags) & STRING_GFLAGS_CHAIN_PART)
+
+#define STRING_IS_CHAIN_TAIL(x) \
+    (((x)->g_flags) & STRING_GFLAGS_CHAIN_TAIL)
+
 #define STRING_IS_NULL(x) \
     ((x) == NULL || ((x)->g_flags) & STRING_GFLAGS_NULL)
 
@@ -330,6 +333,14 @@ typedef struct _YR_META
 } YR_META;
 
 
+typedef struct _YR_MATCHES
+{
+  DECLARE_REFERENCE(YR_MATCH*, head);
+  DECLARE_REFERENCE(YR_MATCH*, tail);
+
+} YR_MATCHES;
+
+
 typedef struct _YR_STRING
 {
   int32_t g_flags;
@@ -337,11 +348,10 @@ typedef struct _YR_STRING
 
   DECLARE_REFERENCE(char*, identifier);
   DECLARE_REFERENCE(uint8_t*, string);
+  DECLARE_REFERENCE(struct _YR_STRING*, chained_to);
 
-  struct {
-    DECLARE_REFERENCE(YR_MATCH*, head);
-    DECLARE_REFERENCE(YR_MATCH*, tail);
-  } matches[MAX_THREADS];
+  YR_MATCHES matches[MAX_THREADS];
+  YR_MATCHES unconfirmed_matches[MAX_THREADS];
 
 } YR_STRING;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list