ep_email_notifications/node_modules/buffertools/BoyerMoore.h
2013-01-29 17:35:40 +00:00

127 lines
2.5 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* adapted from http://en.wikipedia.org/wiki/BoyerMoore_string_search_algorithm */
#ifndef BOYER_MOORE_H
#define BOYER_MOORE_H
#include <stdint.h>
#include <limits.h>
#include <string.h>
#define ALPHABET_SIZE (1 << CHAR_BIT)
static void compute_prefix(const uint8_t* str, size_t size, int result[]) {
size_t q;
int k;
result[0] = 0;
k = 0;
for (q = 1; q < size; q++) {
while (k > 0 && str[k] != str[q])
k = result[k-1];
if (str[k] == str[q])
k++;
result[q] = k;
}
}
static void prepare_badcharacter_heuristic(const uint8_t *str, size_t size, int result[ALPHABET_SIZE]) {
size_t i;
for (i = 0; i < ALPHABET_SIZE; i++)
result[i] = -1;
for (i = 0; i < size; i++)
result[str[i]] = i;
}
void prepare_goodsuffix_heuristic(const uint8_t *normal, const size_t size, int result[]) {
const uint8_t *left = normal;
const uint8_t *right = left + size;
uint8_t * reversed = new uint8_t[size+1];
uint8_t *tmp = reversed + size;
size_t i;
/* reverse string */
*tmp = 0;
while (left < right)
*(--tmp) = *(left++);
int * prefix_normal = new int[size];
int * prefix_reversed = new int[size];
compute_prefix(normal, size, prefix_normal);
compute_prefix(reversed, size, prefix_reversed);
for (i = 0; i <= size; i++) {
result[i] = size - prefix_normal[size-1];
}
for (i = 0; i < size; i++) {
const int j = size - prefix_reversed[i];
const int k = i - prefix_reversed[i]+1;
if (result[j] > k)
result[j] = k;
}
delete[] reversed;
delete[] prefix_normal;
delete[] prefix_reversed;
}
/*
* Boyer-Moore search algorithm
*/
const uint8_t *boyermoore_search(const uint8_t *haystack, size_t haystack_len, const uint8_t *needle, size_t needle_len) {
/*
* Simple checks
*/
if(haystack_len == 0)
return NULL;
if(needle_len == 0)
return NULL;
if(needle_len > haystack_len)
return NULL;
/*
* Initialize heuristics
*/
int badcharacter[ALPHABET_SIZE];
int * goodsuffix = new int[needle_len+1];
prepare_badcharacter_heuristic(needle, needle_len, badcharacter);
prepare_goodsuffix_heuristic(needle, needle_len, goodsuffix);
/*
* Boyer-Moore search
*/
size_t s = 0;
while(s <= (haystack_len - needle_len))
{
size_t j = needle_len;
while(j > 0 && needle[j-1] == haystack[s+j-1])
j--;
if(j > 0)
{
int k = badcharacter[haystack[s+j-1]];
int m;
if(k < (int)j && (m = j-k-1) > goodsuffix[j])
s+= m;
else
s+= goodsuffix[j];
}
else
{
delete[] goodsuffix;
return haystack + s;
}
}
delete[] goodsuffix;
/* not found */
return NULL;
}
#endif /* BoyerMoore.h */