[Forensics-changes] [yara] 131/415: Improve hashing of regexes.
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:42:54 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit 5751c35a52db03230a921810446dc00d7af85505
Author: Shane Huntley <shuntley at google.com>
Date: Fri May 4 22:17:40 2012 +0000
Improve hashing of regexes.
---
libyara/scan.c | 119 ++++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 88 insertions(+), 31 deletions(-)
diff --git a/libyara/scan.c b/libyara/scan.c
index 56eb578..9bf81a4 100644
--- a/libyara/scan.c
+++ b/libyara/scan.c
@@ -42,6 +42,9 @@ GNU General Public License for more details.
static char lowercase[256];
static char altercase[256];
static char isalphanum[256];
+static char isregexescapable[256];
+static char isregexhashable[256];
+
/* Function implementations */
@@ -353,12 +356,36 @@ int populate_hash_table(HASH_TABLE* hash_table, RULE_LIST* rule_list)
{
lowercase[i] = tolower(i);
isalphanum[i] = isalnum(i);
-
+ isregexhashable[i] = isalnum(i);
+
if (lowercase[i] == i)
altercase[i] = toupper(i);
else
altercase[i] = lowercase[i];
}
+
+ // Add other characters that we can hash with for regexes.
+ isregexhashable['%'] = 1;
+ isregexhashable['"'] = 1;
+ isregexhashable[','] = 1;
+ isregexhashable['\''] = 1;
+ isregexhashable[':'] = 1;
+ isregexhashable['/'] = 1;
+
+ // Characters that are escaped in regexes.
+ isregexescapable['['] = 1;
+ isregexescapable['{'] = 1;
+ isregexescapable['.'] = 1;
+ isregexescapable['('] = 1;
+ isregexescapable[')'] = 1;
+ isregexescapable['.'] = 1;
+ isregexescapable['?'] = 1;
+ isregexescapable['^'] = 1;
+ isregexescapable['*'] = 1;
+ isregexescapable['+'] = 1;
+ isregexescapable['$'] = 1;
+ isregexescapable['|'] = 1;
+ isregexescapable['\\'] = 1;
rule = rule_list->head;
@@ -367,42 +394,76 @@ int populate_hash_table(HASH_TABLE* hash_table, RULE_LIST* rule_list)
string = rule->string_list_head;
while (string != NULL)
- {
+ {
fcount = 0;
scount = 0;
-
- if (string->flags & STRING_FLAGS_REGEXP)
- {
+ f = 0;
+ s = 0;
+
+
+ if (string->flags & STRING_FLAGS_REGEXP)
+ {
+ int pos = 0;
+
if (string->string[0] == '^')
- {
- if (string->length > 2)
- {
- f = string->string[1];
- s = string->string[2];
- }
- else
- {
- f = string->string[1];
- s = 0;
- }
- }
- else
- {
- f = string->string[0];
- s = string->string[1];
- }
-
- if (isalphanum[f])
+ {
+ pos++;
+ }
+
+ if (string->length > pos)
+ {
+ // Get first character for hash map.
+ if (string->string[pos] == '\\' && string->length > pos + 1)
+ {
+ if (isregexescapable[string->string[pos+1]])
+ {
+ f = string->string[pos+1];
+ pos += 2;
+ }
+ }
+ else
+ {
+ if (isregexhashable[string->string[pos]])
+ {
+ f = string->string[pos];
+ pos++;
+ }
+ }
+ }
+
+ if (f && string->length > pos)
+ {
+ // Get second character for hash map.
+ if (string->string[pos] == '\\' && string->length > pos + 1)
+ {
+ if (isregexescapable[string->string[pos+1]])
+ {
+ s = string->string[pos+1];
+ pos += 2;
+ }
+ }
+ else
+ {
+ if (isregexhashable[string->string[pos]])
+ {
+ s = string->string[pos];
+ pos++;
+ }
+ }
+ }
+ // If f is set then it can be used in hashtable
+
+ if (f)
{
first[fcount++] = f;
if (string->flags & STRING_FLAGS_NO_CASE)
first[fcount++] = altercase[f];
- if (isalphanum[s])
+ if (s)
{
- second[scount++] = s;
-
+ second[scount++] = s;
+
if (string->flags & STRING_FLAGS_NO_CASE)
second[scount++] = altercase[s];
}
@@ -821,7 +882,3 @@ int find_matches( unsigned char first_char,
return result;
}
-
-
-
-
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list