/*
* Simple String Find Optimisation
*
* Saying: Understanding!
*/
#include
<stdio.h>
#include
<string.h>
/**
* A basic plateform independent C search sub string algorithm
* referenced from the linux 2.2.7 source code for
* demonstration purposes here
*
* my_strstr(..) and not strstr(..)
* Well I called it my_strstr(..) so that if you are using
#includes(..)
* and its already defined ...might give errors...so this way we
know
* its ours :) As strstr(..) is also defined in string.h
*/
char
* my_strstr(const
char * s1,const
char * s2)
{
int l1, l2;
l2
= (int)strlen(s2);
if (!l2)
return (char
*) s1;
l1
= (int)strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1,s2,l2))
return (char
*) s1;
s1++;
}
return NULL;
}//
End strstr(..)
/**
* Our new set of functions for searching for our sub string.
*/
static
void quick_strstr_init(const
char *str, unsigned
char * skips )
{
unsigned char
search_len = strlen(str);
search_len = (unsigned
char)strlen(str);
int search_len2 = search_len + 1;
// The char past the end of the string
// For chars no in searchstr.
for(int i=0;
i<256; i++)
skips[i] = search_len2; //
search_len + 1
// We
didn't use (search_len+1) here
// in the
loop, as where talking optimisation
// here,
and we keep as much as we can out
// of the
loop
// For chars in serachstr, with double chars only
// right most survives
for( i=0; i<search_len ; i++)
skips[str[i]] = search_len - i; //
pos back to front
// We have
a count down!...e.g
// our string = "cat"
// skip['c']
= 3
// skip['a']
= 2
//
skip['t'] = 1
// If we have multiple occurences of a letter, we
use the smallest
// skip value. Don't want to mess up or
optimisation with multiple
// char offsets
}//End
init
char
* quick_strstr_search(const
char *textf, char
*searchstr, unsigned
char * skips)
{
int search_len = (int)strlen(searchstr);
// Len of our sub string
int len = (int)strlen(textf);
// Length of text file
// where searching
unsigned char
* end = (unsigned
char*)textf + len; // Pointer to the end of
// the text file.
int len_searchstr = search_len -
1; // Sub search string minus
// 1, which points to the
// last char, and not the '\0'
int
j; //
Variable used in our loop
// for(;;) generates slightly more otimisation
than while(true)
for(;;)
{
// main loop for comparing string
j=len_searchstr;
// len of search string
while( textf[j] == searchstr[j]
&& --j >= 0); // MAIN Search Loop
// At this point we either have or
have-not found our string after
// searching the length of the sub
string, so how far along do we move
// in our main string where searching
// j==-1 if we have
// j!=-1 if we've not
if( j!= -1 )
{
// Mismatch; align and check for
end of textf
unsigned
char * skipindex = (unsigned
char*)textf + search_len;
if( skipindex >= end )
return NULL;
// string not in textf
textf += skips[*skipindex];
}
else
{
//Match found
return (char*)textf;
}//End if/else
}//End
for loop
return NULL; // No
sub string found
}//End
search(..)
// This is the function we call. And does all the work on
initialising the
// skip array and doing calling the search function. You could put
it
// all into one function - but I thought it would be better to
break it
// up like this - so you could see how it works more easily.
char
* quick_strstr(char * szstr,
char * szsubstr)
{
// Quick Error Check at the start
if(
(szstr == NULL) ||
(szsubstr==NULL) ||
(strlen(szsubstr) > strlen(szstr))
) return NULL;
// Do our magic search
unsigned char
skips[256];
quick_strstr_init(szsubstr, skips);
char * place = quick_strstr_search( szstr,
szsubstr, skips);
return place;
}//
End quick_strstr(..)
void
main()
{
char textfile[] = "no thing as vaguenono as
something.";
char * place = quick_strstr( textfile, "nono"
);
if( place != NULL )
printf(place);
printf("\nEnter to cont");
scanf("continue");
}//End
main()
|