[Forensics-changes] [yara] 57/415: Added one-byte hashing to improve scanning speed
Hilko Bengen
bengen at moszumanska.debian.org
Thu Apr 3 05:42:44 UTC 2014
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to branch debian
in repository yara.
commit 58fc33eecb737bbfda4caada96c96692ec88984f
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Thu Jan 14 10:53:58 2010 +0000
Added one-byte hashing to improve scanning speed
---
libyara/libyara.c | 18 +++++-
libyara/scan.c | 172 +++++++++++++++++++++++++++++++++++++-----------------
libyara/yara.h | 3 +-
3 files changed, 136 insertions(+), 57 deletions(-)
diff --git a/libyara/libyara.c b/libyara/libyara.c
index dbf35fc..9b7b2ff 100644
--- a/libyara/libyara.c
+++ b/libyara/libyara.c
@@ -55,7 +55,8 @@ YARA_CONTEXT* yr_create_context()
context->allow_includes = TRUE;
context->current_namespace = yr_create_namespace(context, "default");
- memset(context->hash_table.hashed_strings, 0, sizeof(context->hash_table.hashed_strings));
+ memset(context->hash_table.hashed_strings_2b, 0, sizeof(context->hash_table.hashed_strings_2b));
+ memset(context->hash_table.hashed_strings_1b, 0, sizeof(context->hash_table.hashed_strings_1b));
return context;
@@ -602,10 +603,10 @@ int yr_calculate_rules_weight(YARA_CONTEXT* context)
}
for (i = 0; i < 256; i++)
- {
+ {
for (j = 0; j < 256; j++)
{
- entry = context->hash_table.hashed_strings[i][j];
+ entry = context->hash_table.hashed_strings_2b[i][j];
count = 0;
@@ -618,6 +619,17 @@ int yr_calculate_rules_weight(YARA_CONTEXT* context)
weight += count;
}
+
+ entry = context->hash_table.hashed_strings_1b[i];
+
+ count = 0;
+
+ while (entry != NULL)
+ {
+ weight += string_weight(entry->string, 2);
+ entry = entry->next;
+ count++;
+ }
}
entry = context->hash_table.non_hashed_strings;
diff --git a/libyara/scan.c b/libyara/scan.c
index 64edf0b..6fa6cfb 100644
--- a/libyara/scan.c
+++ b/libyara/scan.c
@@ -370,7 +370,8 @@ int populate_hash_table(HASH_TABLE* hash_table, RULE_LIST* rule_list)
STRING* string;
STRING_LIST_ENTRY* entry;
unsigned char x,y;
- char hashable;
+ char hashable_2b;
+ char hashable_1b;
int i, next;
for (i = 0; i < 256; i++)
@@ -410,93 +411,136 @@ int populate_hash_table(HASH_TABLE* hash_table, RULE_LIST* rule_list)
y = string->string[1];
}
- hashable = isalphanum[x] && isalphanum[y];
+ hashable_2b = isalphanum[x] && isalphanum[y];
+ hashable_1b = isalphanum[x];
}
else
{
x = string->string[0];
y = string->string[1];
- hashable = TRUE;
+ hashable_2b = TRUE;
+ hashable_1b = TRUE;
} /* if (string->flags & STRING_FLAGS_REGEXP) */
if (string->flags & STRING_FLAGS_HEXADECIMAL)
{
- hashable = (string->mask[0] == 0xFF) && (string->mask[1] == 0xFF);
+ hashable_2b = (string->mask[0] == 0xFF) && (string->mask[1] == 0xFF);
+ hashable_1b = (string->mask[0] == 0xFF);
}
- if (hashable && string->flags & STRING_FLAGS_NO_CASE)
+ if (hashable_1b && string->flags & STRING_FLAGS_NO_CASE)
{
- /*
- if string is case-insensitive add an entry in the hash table
- for each posible combination
- */
+ if (hashable_2b)
+ {
+ /*
+ if string is case-insensitive add an entry in the hash table
+ for each posible combination
+ */
- x = lowercase[x];
- y = lowercase[y];
+ x = lowercase[x];
+ y = lowercase[y];
- /* both lowercases */
+ /* both lowercases */
- entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
+ entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
- if (entry == NULL)
- return ERROR_INSUFICIENT_MEMORY;
+ if (entry == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
- entry->next = hash_table->hashed_strings[x][y];
- entry->string = string;
- hash_table->hashed_strings[x][y] = entry;
+ entry->next = hash_table->hashed_strings_2b[x][y];
+ entry->string = string;
+ hash_table->hashed_strings_2b[x][y] = entry;
- /* X uppercase Y lowercase */
+ /* X uppercase Y lowercase */
- x = toupper(x);
+ x = toupper(x);
- entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
+ entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
- if (entry == NULL)
- return ERROR_INSUFICIENT_MEMORY;
+ if (entry == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
- entry->next = hash_table->hashed_strings[x][y];
- entry->string = string;
- hash_table->hashed_strings[x][y] = entry;
+ entry->next = hash_table->hashed_strings_2b[x][y];
+ entry->string = string;
+ hash_table->hashed_strings_2b[x][y] = entry;
- /* both uppercases */
+ /* both uppercases */
- y = toupper(y);
+ y = toupper(y);
- entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
+ entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
- if (entry == NULL)
- return ERROR_INSUFICIENT_MEMORY;
+ if (entry == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
- entry->next = hash_table->hashed_strings[x][y];
- entry->string = string;
- hash_table->hashed_strings[x][y] = entry;
+ entry->next = hash_table->hashed_strings_2b[x][y];
+ entry->string = string;
+ hash_table->hashed_strings_2b[x][y] = entry;
- /* X lowercase Y uppercase */
+ /* X lowercase Y uppercase */
- x = lowercase[x];
+ x = lowercase[x];
- entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
+ entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
- if (entry == NULL)
- return ERROR_INSUFICIENT_MEMORY;
+ if (entry == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
- entry->next = hash_table->hashed_strings[x][y];
- entry->string = string;
- hash_table->hashed_strings[x][y] = entry;
+ entry->next = hash_table->hashed_strings_2b[x][y];
+ entry->string = string;
+ hash_table->hashed_strings_2b[x][y] = entry;
+ }
+ else
+ {
+ /* lowercase */
+
+ x = lowercase[x];
+
+ entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
+
+ if (entry == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
+
+ entry->next = hash_table->hashed_strings_1b[x];
+ entry->string = string;
+ hash_table->hashed_strings_1b[x] = entry;
+
+ /* uppercase */
+
+ x = toupper(x);
+
+ entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
+
+ if (entry == NULL)
+ return ERROR_INSUFICIENT_MEMORY;
+
+ entry->next = hash_table->hashed_strings_1b[x];
+ entry->string = string;
+ hash_table->hashed_strings_1b[x] = entry;
+ }
}
- else if (hashable)
+ else if (hashable_1b)
{
- entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
-
+ entry = (STRING_LIST_ENTRY*) yr_malloc(sizeof(STRING_LIST_ENTRY));
+
if (entry == NULL)
return ERROR_INSUFICIENT_MEMORY;
-
- entry->next = hash_table->hashed_strings[x][y];
- entry->string = string;
- hash_table->hashed_strings[x][y] = entry;
+
+ entry->string = string;
+
+ if (hashable_2b)
+ {
+ entry->next = hash_table->hashed_strings_2b[x][y];
+ hash_table->hashed_strings_2b[x][y] = entry;
+ }
+ else
+ {
+ entry->next = hash_table->hashed_strings_1b[x];
+ hash_table->hashed_strings_1b[x] = entry;
+ }
}
else /* non hashable */
{
@@ -531,9 +575,20 @@ void clear_hash_table(HASH_TABLE* hash_table)
for (i = 0; i < 256; i++)
{
+ entry = hash_table->hashed_strings_1b[i];
+
+ while (entry != NULL)
+ {
+ next_entry = entry->next;
+ yr_free(entry);
+ entry = next_entry;
+ }
+
+ hash_table->hashed_strings_1b[i] = NULL;
+
for (j = 0; j < 256; j++)
{
- entry = hash_table->hashed_strings[i][j];
+ entry = hash_table->hashed_strings_2b[i][j];
while (entry != NULL)
{
@@ -542,7 +597,7 @@ void clear_hash_table(HASH_TABLE* hash_table)
entry = next_entry;
}
- hash_table->hashed_strings[i][j] = NULL;
+ hash_table->hashed_strings_2b[i][j] = NULL;
}
}
@@ -773,9 +828,20 @@ int find_matches( unsigned char first_char,
int result = ERROR_SUCCESS;
- if (context->hash_table.hashed_strings[first_char][second_char] != NULL)
+ if (context->hash_table.hashed_strings_2b[first_char][second_char] != NULL)
+ {
+ result = find_matches_for_strings( context->hash_table.hashed_strings_2b[first_char][second_char],
+ buffer,
+ buffer_size,
+ current_file_offset,
+ flags,
+ negative_size);
+ }
+
+
+ if (result == ERROR_SUCCESS && context->hash_table.hashed_strings_1b[first_char] != NULL)
{
- result = find_matches_for_strings( context->hash_table.hashed_strings[first_char][second_char],
+ result = find_matches_for_strings( context->hash_table.hashed_strings_1b[first_char],
buffer,
buffer_size,
current_file_offset,
diff --git a/libyara/yara.h b/libyara/yara.h
index 9312728..15f7958 100644
--- a/libyara/yara.h
+++ b/libyara/yara.h
@@ -230,7 +230,8 @@ typedef struct _RULE_LIST
typedef struct _HASH_TABLE
{
- STRING_LIST_ENTRY* hashed_strings[256][256];
+ STRING_LIST_ENTRY* hashed_strings_2b[256][256];
+ STRING_LIST_ENTRY* hashed_strings_1b[256];
STRING_LIST_ENTRY* non_hashed_strings;
int populated;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list