[Forensics-changes] [yara] 364/415: Fix issue #103

Hilko Bengen bengen at moszumanska.debian.org
Thu Apr 3 05:43:24 UTC 2014


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to branch debian
in repository yara.

commit b9463a5a4d9a790cb7ab160139c8ff13104fa8fc
Author: Victor Manuel Alvarez <vmalvarez at virustotal.com>
Date:   Thu Jan 16 17:46:19 2014 +0100

    Fix issue #103
---
 libyara/re_lexer.c   | 310 ++++++++++++++++++++++++---------------------------
 libyara/re_lexer.l   |  57 +++++++---
 yara-python/tests.py |   5 +
 3 files changed, 194 insertions(+), 178 deletions(-)

diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c
index e10eab7..48957f4 100644
--- a/libyara/re_lexer.c
+++ b/libyara/re_lexer.c
@@ -363,8 +363,8 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
 	*yy_cp = '\0'; \
 	yyg->yy_c_buf_p = yy_cp;
 
-#define YY_NUM_RULES 28
-#define YY_END_OF_BUFFER 29
+#define YY_NUM_RULES 27
+#define YY_END_OF_BUFFER 28
 /* This struct is not used in this scanner,
    but its presence is necessary. */
 struct yy_trans_info
@@ -372,12 +372,13 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[40] =
+static yyconst flex_int16_t yy_accept[42] =
     {   0,
-        0,    0,    0,    0,   29,    7,    7,   27,    6,   15,
-        7,   26,   25,   16,    5,    3,   14,   13,   11,    9,
-       12,   10,    8,    0,    0,    0,   24,   22,   20,   18,
-       23,   21,   19,    4,    0,    1,    2,   17,    0
+        0,    0,    0,    0,   28,    7,    7,   26,    6,   15,
+        7,   25,   24,   16,    5,    3,   14,   13,   11,    9,
+       12,   10,    8,    0,    0,    0,    0,   23,   21,   19,
+       22,   20,   18,    0,    4,    0,    1,    2,   17,    0,
+        0
     } ;
 
 static yyconst flex_int32_t yy_ec[256] =
@@ -388,14 +389,14 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1,    3,    1,    1,    1,    3,
         3,    3,    3,    4,    5,    3,    1,    6,    6,    6,
         6,    6,    6,    6,    6,    6,    6,    1,    1,    1,
-        1,    1,    3,    1,    1,    1,    1,    7,    1,    1,
+        1,    1,    3,    1,    7,    7,    7,    8,    7,    7,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    8,    1,    1,    1,    9,    1,    1,    1,
-       10,   11,   12,   13,    1,    1,    1,    1,    1,   14,
+        1,    1,    9,    1,    1,    1,   10,    1,    1,    1,
+       11,   12,   13,   14,    1,    1,    7,    7,    7,   15,
 
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,   15,    1,    1,    1,   16,    1,
-        1,    1,   17,    3,   18,    1,    1,    1,    1,    1,
+        7,    7,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,   16,    1,    1,    1,   17,   18,
+        1,    1,   19,    3,   20,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -412,65 +413,69 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[19] =
+static yyconst flex_int32_t yy_meta[21] =
     {   0,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    2,    1,    1,    1,    1,    1,    1
+        1,    2,    1,    1,    3,    4,    4,    4,    1,    1,
+        1,    1,    5,    1,    4,    1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int16_t yy_base[43] =
+static yyconst flex_int16_t yy_base[48] =
     {   0,
-        0,   16,    3,    9,   32,   74,   74,   74,   10,   28,
-        0,   26,   38,   25,   74,   13,    1,   74,   74,   74,
-       74,   74,   74,    6,   51,    0,   74,   74,   74,   74,
-       74,   74,   74,   74,   10,   74,   74,   74,   74,   69,
-       71,    0
+        0,   18,    3,    5,   43,   91,   91,   91,    9,   32,
+        0,   34,   42,   31,   91,   22,   28,   91,   91,   91,
+       91,   91,   91,    4,   49,    0,   28,   26,   23,   21,
+       20,    4,    3,    2,   91,    7,   91,   91,   91,    0,
+       91,   69,   74,   79,   84,   86,    1
     } ;
 
-static yyconst flex_int16_t yy_def[43] =
+static yyconst flex_int16_t yy_def[48] =
     {   0,
-       40,   40,   41,   41,   39,   39,   39,   39,   39,   39,
-       39,   39,   39,   39,   39,   39,   39,   39,   39,   39,
-       39,   39,   39,   39,   39,   42,   39,   39,   39,   39,
-       39,   39,   39,   39,   39,   39,   39,   39,    0,   39,
-       39,   39
+       42,   42,   43,   43,   41,   41,   41,   41,   41,   41,
+       41,   41,   44,   41,   41,   41,   41,   41,   41,   41,
+       41,   41,   41,   41,   41,   45,   41,   41,   41,   41,
+       41,   41,   41,   46,   41,   41,   41,   41,   41,   47,
+        0,   41,   41,   41,   41,   41,   41
     } ;
 
-static yyconst flex_int16_t yy_nxt[93] =
+static yyconst flex_int16_t yy_nxt[112] =
     {   0,
-       38,    7,    8,   24,   39,   25,   17,   39,   39,    9,
-       10,   35,    8,   13,   14,   35,   11,    7,    8,   13,
-       14,   15,   16,   36,   34,    9,   10,   36,    8,   26,
-       26,   39,   11,   17,   18,   19,   20,   39,   39,   39,
-       39,   21,   22,   23,   27,   28,   29,   39,   39,   30,
-       39,   31,   32,   33,   24,   39,   25,   39,   39,   39,
-       39,   39,   39,   39,   39,   39,   39,   39,   37,    6,
-        6,   12,   12,    5,   39,   39,   39,   39,   39,   39,
-       39,   39,   39,   39,   39,   39,   39,   39,   39,   39,
-       39,   39
-
+       41,    7,    8,   24,   27,   25,   26,   26,   26,   36,
+        9,   10,   36,    8,   13,   14,   13,   14,   11,    7,
+        8,   15,   16,   37,   26,   26,   37,   26,    9,   10,
+       26,    8,   26,   17,   35,   26,   11,   17,   26,   18,
+       19,   20,   41,   41,   41,   41,   21,   22,   23,   28,
+       29,   30,   24,   41,   25,   41,   31,   32,   33,   34,
+       41,   41,   41,   41,   41,   41,   41,   41,   38,    6,
+        6,    6,    6,    6,   12,   12,   12,   12,   12,   27,
+       41,   27,   27,   27,   39,   39,   39,   39,   40,   40,
+        5,   41,   41,   41,   41,   41,   41,   41,   41,   41,
+
+       41,   41,   41,   41,   41,   41,   41,   41,   41,   41,
+       41
     } ;
 
-static yyconst flex_int16_t yy_chk[93] =
+static yyconst flex_int16_t yy_chk[112] =
     {   0,
-       42,    1,    1,   11,    0,   11,   17,    0,    0,    1,
-        1,   24,    1,    3,    3,   35,    1,    2,    2,    4,
-        4,    9,    9,   24,   16,    2,    2,   35,    2,   14,
-       12,    5,    2,   10,   10,   10,   10,    0,    0,    0,
-        0,   10,   10,   10,   13,   13,   13,    0,    0,   13,
-        0,   13,   13,   13,   25,    0,   25,    0,    0,    0,
-        0,    0,    0,    0,    0,    0,    0,    0,   25,   40,
-       40,   41,   41,   39,   39,   39,   39,   39,   39,   39,
-       39,   39,   39,   39,   39,   39,   39,   39,   39,   39,
-       39,   39
-
+        0,    1,    1,   11,   47,   11,   34,   33,   32,   24,
+        1,    1,   36,    1,    3,    3,    4,    4,    1,    2,
+        2,    9,    9,   24,   31,   30,   36,   29,    2,    2,
+       28,    2,   27,   17,   16,   14,    2,   10,   12,   10,
+       10,   10,    5,    0,    0,    0,   10,   10,   10,   13,
+       13,   13,   25,    0,   25,    0,   13,   13,   13,   13,
+        0,    0,    0,    0,    0,    0,    0,    0,   25,   42,
+       42,   42,   42,   42,   43,   43,   43,   43,   43,   44,
+        0,   44,   44,   44,   45,   45,   45,   45,   46,   46,
+       41,   41,   41,   41,   41,   41,   41,   41,   41,   41,
+
+       41,   41,   41,   41,   41,   41,   41,   41,   41,   41,
+       41
     } ;
 
 /* Table of booleans, true if rule could match eol. */
-static yyconst flex_int32_t yy_rule_can_match_eol[29] =
+static yyconst flex_int32_t yy_rule_can_match_eol[28] =
     {   0,
 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
-    0, 0, 0, 0, 0, 0, 1, 0, 0,     };
+    0, 0, 0, 0, 0, 1, 0, 0,     };
 
 /* The intent behind this definition is that it'll catch
  * any uses of REJECT which flex missed.
@@ -498,6 +503,8 @@ limitations under the License.
 /* Lexical analyzer for regular expressions */
 #line 20 "re_lexer.l"
 
+#include <assert.h>
+
 #include "yara.h"
 #include "atoms.h"
 #include "mem.h"
@@ -518,11 +525,12 @@ limitations under the License.
 #endif
 
 
+uint8_t escaped_char_value(char* text);
 uint8_t read_escaped_char(yyscan_t yyscanner);
 
 #define YY_NO_UNISTD_H 1
 
-#line 526 "re_lexer.c"
+#line 534 "re_lexer.c"
 
 #define INITIAL 0
 #define char_class 1
@@ -630,8 +638,6 @@ extern int re_yywrap (yyscan_t yyscanner );
 #endif
 #endif
 
-    static void yyunput (int c,char *buf_ptr  ,yyscan_t yyscanner);
-    
 #ifndef yytext_ptr
 static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
 #endif
@@ -756,10 +762,10 @@ YY_DECL
 	register int yy_act;
     struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
 
-#line 61 "re_lexer.l"
+#line 65 "re_lexer.l"
 
 
-#line 763 "re_lexer.c"
+#line 769 "re_lexer.c"
 
     yylval = yylval_param;
 
@@ -814,13 +820,13 @@ yy_match:
 			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 				{
 				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 40 )
+				if ( yy_current_state >= 42 )
 					yy_c = yy_meta[(unsigned int) yy_c];
 				}
 			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
 			++yy_cp;
 			}
-		while ( yy_base[yy_current_state] != 74 );
+		while ( yy_base[yy_current_state] != 91 );
 
 yy_find_action:
 		yy_act = yy_accept[yy_current_state];
@@ -858,7 +864,7 @@ do_action:	/* This label is used only to access EOF actions. */
 
 case 1:
 YY_RULE_SETUP
-#line 63 "re_lexer.l"
+#line 67 "re_lexer.l"
 {
 
   // Examples: {3,8} {0,5} {,5} {7,}
@@ -894,7 +900,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 2:
 YY_RULE_SETUP
-#line 97 "re_lexer.l"
+#line 101 "re_lexer.l"
 {
 
   // Example: {10}
@@ -914,7 +920,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 3:
 YY_RULE_SETUP
-#line 115 "re_lexer.l"
+#line 119 "re_lexer.l"
 {
 
   // Start of a negated character class. Example: [^abcd]
@@ -926,7 +932,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 4:
 YY_RULE_SETUP
-#line 124 "re_lexer.l"
+#line 128 "re_lexer.l"
 {
 
   // Start of character negated class containing a ].
@@ -941,7 +947,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 5:
 YY_RULE_SETUP
-#line 137 "re_lexer.l"
+#line 141 "re_lexer.l"
 {
 
   // Start of character class containing a ].
@@ -956,7 +962,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 6:
 YY_RULE_SETUP
-#line 150 "re_lexer.l"
+#line 154 "re_lexer.l"
 {
 
   // Start of character class. Example: [abcd]
@@ -969,7 +975,7 @@ YY_RULE_SETUP
 case 7:
 /* rule 7 can match eol */
 YY_RULE_SETUP
-#line 160 "re_lexer.l"
+#line 164 "re_lexer.l"
 {
 
   // Any non-special character is passed as a CHAR token to the scanner.
@@ -980,49 +986,49 @@ YY_RULE_SETUP
 	YY_BREAK
 case 8:
 YY_RULE_SETUP
-#line 169 "re_lexer.l"
+#line 173 "re_lexer.l"
 {
   return _WORD_CHAR_;
 }
 	YY_BREAK
 case 9:
 YY_RULE_SETUP
-#line 174 "re_lexer.l"
+#line 178 "re_lexer.l"
 {
   return _NON_WORD_CHAR_;
 }
 	YY_BREAK
 case 10:
 YY_RULE_SETUP
-#line 179 "re_lexer.l"
+#line 183 "re_lexer.l"
 {
   return _SPACE_;
 }
 	YY_BREAK
 case 11:
 YY_RULE_SETUP
-#line 184 "re_lexer.l"
+#line 188 "re_lexer.l"
 {
   return _NON_SPACE_;
 }
 	YY_BREAK
 case 12:
 YY_RULE_SETUP
-#line 189 "re_lexer.l"
+#line 193 "re_lexer.l"
 {
   return _DIGIT_;
 }
 	YY_BREAK
 case 13:
 YY_RULE_SETUP
-#line 194 "re_lexer.l"
+#line 198 "re_lexer.l"
 {
   return _NON_DIGIT_;
 }
 	YY_BREAK
 case 14:
 YY_RULE_SETUP
-#line 199 "re_lexer.l"
+#line 203 "re_lexer.l"
 {
 
   yyerror(yyscanner, lex_env, "backreferences are not allowed");
@@ -1031,7 +1037,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 15:
 YY_RULE_SETUP
-#line 206 "re_lexer.l"
+#line 210 "re_lexer.l"
 {
   yylval->integer = read_escaped_char(yyscanner);
   return _CHAR_;
@@ -1039,7 +1045,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 16:
 YY_RULE_SETUP
-#line 212 "re_lexer.l"
+#line 216 "re_lexer.l"
 {
 
   // End of character class.
@@ -1062,7 +1068,7 @@ YY_RULE_SETUP
 case 17:
 /* rule 17 can match eol */
 YY_RULE_SETUP
-#line 232 "re_lexer.l"
+#line 237 "re_lexer.l"
 {
 
   // A range inside a character class.
@@ -1074,6 +1080,16 @@ YY_RULE_SETUP
   uint8_t start = yytext[0];
   uint8_t end = yytext[2];
 
+  if (start == '\\')
+  {
+    start = escaped_char_value(yytext);
+
+    if (yytext[1] == 'x')
+      end = yytext[5];
+    else
+      end = yytext[3];
+  }
+
   if (end == '\\')
     end = read_escaped_char(yyscanner);
 
@@ -1091,15 +1107,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 18:
 YY_RULE_SETUP
-#line 259 "re_lexer.l"
-{
-
-  LEX_ENV->class_vector[']' / 8] |= 1 << ']' % 8;
-}
-	YY_BREAK
-case 19:
-YY_RULE_SETUP
-#line 265 "re_lexer.l"
+#line 274 "re_lexer.l"
 {
 
   int i;
@@ -1112,9 +1120,9 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[i] |= word_chars[i];
 }
 	YY_BREAK
-case 20:
+case 19:
 YY_RULE_SETUP
-#line 278 "re_lexer.l"
+#line 287 "re_lexer.l"
 {
 
   int i;
@@ -1127,18 +1135,18 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[i] |= ~word_chars[i];
 }
 	YY_BREAK
-case 21:
+case 20:
 YY_RULE_SETUP
-#line 291 "re_lexer.l"
+#line 300 "re_lexer.l"
 {
 
   LEX_ENV->class_vector[' ' / 8] |= 1 << ' ' % 8;
   LEX_ENV->class_vector['\t' / 8] |= 1 << '\t' % 8;
 }
 	YY_BREAK
-case 22:
+case 21:
 YY_RULE_SETUP
-#line 298 "re_lexer.l"
+#line 307 "re_lexer.l"
 {
 
   int i;
@@ -1150,9 +1158,9 @@ YY_RULE_SETUP
   LEX_ENV->class_vector['\t' / 8] &= ~(1 << '\t' % 8);
 }
 	YY_BREAK
-case 23:
+case 22:
 YY_RULE_SETUP
-#line 310 "re_lexer.l"
+#line 319 "re_lexer.l"
 {
 
   char c;
@@ -1161,9 +1169,9 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
 }
 	YY_BREAK
-case 24:
+case 23:
 YY_RULE_SETUP
-#line 319 "re_lexer.l"
+#line 328 "re_lexer.l"
 {
 
   int i;
@@ -1176,19 +1184,19 @@ YY_RULE_SETUP
     LEX_ENV->class_vector[c / 8] &= ~(1 << c % 8);
 }
 	YY_BREAK
-case 25:
+case 24:
 YY_RULE_SETUP
-#line 332 "re_lexer.l"
+#line 341 "re_lexer.l"
 {
 
   uint8_t c = read_escaped_char(yyscanner);
-  unput(c);
+  LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
 }
 	YY_BREAK
-case 26:
-/* rule 26 can match eol */
+case 25:
+/* rule 25 can match eol */
 YY_RULE_SETUP
-#line 339 "re_lexer.l"
+#line 348 "re_lexer.l"
 {
 
   // A character class (i.e: [0-9a-f]) is represented by a 256-bits vector,
@@ -1198,7 +1206,7 @@ YY_RULE_SETUP
 }
 	YY_BREAK
 case YY_STATE_EOF(char_class):
-#line 348 "re_lexer.l"
+#line 357 "re_lexer.l"
 {
 
   // End of regexp reached while scanning a character class.
@@ -1207,9 +1215,9 @@ case YY_STATE_EOF(char_class):
   yyterminate();
 }
 	YY_BREAK
-case 27:
+case 26:
 YY_RULE_SETUP
-#line 357 "re_lexer.l"
+#line 366 "re_lexer.l"
 {
 
   if (yytext[0] >= 32 && yytext[0] < 127)
@@ -1224,18 +1232,18 @@ YY_RULE_SETUP
 }
 	YY_BREAK
 case YY_STATE_EOF(INITIAL):
-#line 371 "re_lexer.l"
+#line 380 "re_lexer.l"
 {
 
   yyterminate();
 }
 	YY_BREAK
-case 28:
+case 27:
 YY_RULE_SETUP
-#line 376 "re_lexer.l"
+#line 385 "re_lexer.l"
 ECHO;
 	YY_BREAK
-#line 1239 "re_lexer.c"
+#line 1247 "re_lexer.c"
 
 	case YY_END_OF_BUFFER:
 		{
@@ -1527,7 +1535,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 			{
 			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 40 )
+			if ( yy_current_state >= 42 )
 				yy_c = yy_meta[(unsigned int) yy_c];
 			}
 		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1556,57 +1564,15 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
 	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 		{
 		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 40 )
+		if ( yy_current_state >= 42 )
 			yy_c = yy_meta[(unsigned int) yy_c];
 		}
 	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 39);
+	yy_is_jam = (yy_current_state == 41);
 
 	return yy_is_jam ? 0 : yy_current_state;
 }
 
-    static void yyunput (int c, register char * yy_bp , yyscan_t yyscanner)
-{
-	register char *yy_cp;
-    struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
-    yy_cp = yyg->yy_c_buf_p;
-
-	/* undo effects of setting up yytext */
-	*yy_cp = yyg->yy_hold_char;
-
-	if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
-		{ /* need to shift things up to make room */
-		/* +2 for EOB chars. */
-		register yy_size_t number_to_move = yyg->yy_n_chars + 2;
-		register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
-					YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
-		register char *source =
-				&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
-
-		while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
-			*--dest = *--source;
-
-		yy_cp += (int) (dest - source);
-		yy_bp += (int) (dest - source);
-		YY_CURRENT_BUFFER_LVALUE->yy_n_chars =
-			yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
-
-		if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
-			YY_FATAL_ERROR( "flex scanner push-back overflow" );
-		}
-
-	*--yy_cp = (char) c;
-
-    if ( c == '\n' ){
-        --yylineno;
-    }
-
-	yyg->yytext_ptr = yy_bp;
-	yyg->yy_hold_char = *yy_cp;
-	yyg->yy_c_buf_p = yy_cp;
-}
-
 #ifndef YY_NO_INPUT
 #ifdef __cplusplus
     static int yyinput (yyscan_t yyscanner)
@@ -2410,21 +2376,22 @@ void re_yyfree (void * ptr , yyscan_t yyscanner)
 
 #define YYTABLES_NAME "yytables"
 
-#line 376 "re_lexer.l"
+#line 385 "re_lexer.l"
 
 
 
-uint8_t read_escaped_char(yyscan_t yyscanner)
+uint8_t escaped_char_value(char* text)
 {
-  int result;
   char hex[3];
-  int c = input(yyscanner);
+  int result;
+
+  assert(text[0] == '\\');
 
-  switch(c)
+  switch(text[1])
   {
   case 'x':
-    hex[0] = input(yyscanner);
-    hex[1] = input(yyscanner);
+    hex[0] = text[2];
+    hex[1] = text[3];
     hex[2] = '\0';
     sscanf(hex, "%x", &result);
     break;
@@ -2450,13 +2417,30 @@ uint8_t read_escaped_char(yyscan_t yyscanner)
     break;
 
   default:
-    result = c;
+    result = text[1];
   }
 
   return result;
 }
 
 
+uint8_t read_escaped_char(yyscan_t yyscanner)
+{
+  char text[4];
+
+  text[0] = '\\';
+  text[1] = input(yyscanner);
+
+  if (text[1] == 'x')
+  {
+    text[2] = input(yyscanner);
+    text[3] = input(yyscanner);
+  }
+
+  return escaped_char_value(text);
+}
+
+
 
 #ifdef WIN32
 extern DWORD recovery_state_key;
diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l
index bc9c23e..6b063a0 100644
--- a/libyara/re_lexer.l
+++ b/libyara/re_lexer.l
@@ -18,6 +18,8 @@ limitations under the License.
 
 %{
 
+#include <assert.h>
+
 #include "yara.h"
 #include "atoms.h"
 #include "mem.h"
@@ -38,6 +40,7 @@ limitations under the License.
 #endif
 
 
+uint8_t escaped_char_value(char* text);
 uint8_t read_escaped_char(yyscan_t yyscanner);
 
 %}
@@ -45,6 +48,7 @@ uint8_t read_escaped_char(yyscan_t yyscanner);
 %option reentrant bison-bridge
 %option noyywrap
 %option nounistd
+%option nounput
 %option yylineno
 %option prefix="re_yy"
 
@@ -229,7 +233,8 @@ hex_digit     [0-9a-fA-F]
 }
 
 
-<char_class>[^\\]\-[^]] {
+
+<char_class>(\\x{hex_digit}{2}|\\.|[^\\])\-[^]] {
 
   // A range inside a character class.
   //  [abc0-9]
@@ -240,6 +245,16 @@ hex_digit     [0-9a-fA-F]
   uint8_t start = yytext[0];
   uint8_t end = yytext[2];
 
+  if (start == '\\')
+  {
+    start = escaped_char_value(yytext);
+
+    if (yytext[1] == 'x')
+      end = yytext[5];
+    else
+      end = yytext[3];
+  }
+
   if (end == '\\')
     end = read_escaped_char(yyscanner);
 
@@ -256,12 +271,6 @@ hex_digit     [0-9a-fA-F]
 }
 
 
-<char_class>\\] {
-
-  LEX_ENV->class_vector[']' / 8] |= 1 << ']' % 8;
-}
-
-
 <char_class>\\w {
 
   int i;
@@ -332,7 +341,7 @@ hex_digit     [0-9a-fA-F]
 <char_class>\\ {
 
   uint8_t c = read_escaped_char(yyscanner);
-  unput(c);
+  LEX_ENV->class_vector[c / 8] |= 1 << c % 8;
 }
 
 
@@ -375,17 +384,18 @@ hex_digit     [0-9a-fA-F]
 
 %%
 
-uint8_t read_escaped_char(yyscan_t yyscanner)
+uint8_t escaped_char_value(char* text)
 {
-  int result;
   char hex[3];
-  int c = input(yyscanner);
+  int result;
+
+  assert(text[0] == '\\');
 
-  switch(c)
+  switch(text[1])
   {
   case 'x':
-    hex[0] = input(yyscanner);
-    hex[1] = input(yyscanner);
+    hex[0] = text[2];
+    hex[1] = text[3];
     hex[2] = '\0';
     sscanf(hex, "%x", &result);
     break;
@@ -411,13 +421,30 @@ uint8_t read_escaped_char(yyscan_t yyscanner)
     break;
 
   default:
-    result = c;
+    result = text[1];
   }
 
   return result;
 }
 
 
+uint8_t read_escaped_char(yyscan_t yyscanner)
+{
+  char text[4];
+
+  text[0] = '\\';
+  text[1] = input(yyscanner);
+
+  if (text[1] == 'x')
+  {
+    text[2] = input(yyscanner);
+    text[3] = input(yyscanner);
+  }
+
+  return escaped_char_value(text);
+}
+
+
 
 #ifdef WIN32
 extern DWORD recovery_state_key;
diff --git a/yara-python/tests.py b/yara-python/tests.py
index 1be4cf9..f70b47a 100644
--- a/yara-python/tests.py
+++ b/yara-python/tests.py
@@ -147,6 +147,11 @@ RE_TESTS = [
   (r'\x00\x01\x02', '\x00\x01\x02', SUCCEED, '\x00\x01\x02'),
   (r'[\x00-\x02]+', '\x00\x01\x02', SUCCEED, '\x00\x01\x02'),
   (r'[\x00-\x02]+', '\x03\x04\x05', FAIL),
+  (r'[\x5D]', ']', SUCCEED, ']'),
+  (r'[\0x5A-\x5D]', '\x5B', SUCCEED, '\x5B'),
+  (r'[\x5D-\x5F]', '\x5E', SUCCEED, '\x5E'),
+  (r'[\x5C-\x5F]', '\x5E', SUCCEED, '\x5E'),
+  (r'[\x5D-\x5F]', '\x5E', SUCCEED, '\x5E'),
   ('a\wc', 'abc', SUCCEED, 'abc'),
   ('a\wc', 'a_c', SUCCEED, 'a_c'),
   ('a\wc', 'a0c', SUCCEED, 'a0c'),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list