Aho–Corasick algorithm is a string-searching algorithm invented by Alfred V. Aho and Margaret J. Corasick. It is a kind of dictionary-matching algorithm that locates elements of a finite set of strings within an input text. It matches all strings simultaneously.
var AhoCorasick = function(keywords)
{
this._buildTables(keywords);
};
AhoCorasick.prototype._buildTables = function(keywords)
{
var gotoFn =
{
0: {}
};
var output = {};
var state = 0;
keywords.forEach(function(word)
{
var curr = 0;
for (var i=0; i<word.length; i++)
{
var l = word[i];
if (gotoFn[curr] && l in gotoFn[curr])
{
curr = gotoFn[curr][l];
}
else
{
state++;
gotoFn[curr][l] = state;
gotoFn[state] = {};
curr = state;
output[state] = [];
}
}
output[curr].push(word);
});
var failure = {};
var xs = [];
for (var l in gotoFn[0])
{
var state = gotoFn[0][l];
failure[state] = 0;
xs.push(state);
}
while (xs.length)
{
var r = xs.shift();
for (var l in gotoFn[r])
{
var s = gotoFn[r][l];
xs.push(s);
var state = failure[r];
while(state > 0 && !(l in gotoFn[state]))
{
state = failure[state];
}
if (l in gotoFn[state])
{
var fs = gotoFn[state][l];
failure[s] = fs;
output[s] = output[s].concat(output[fs]);
}
else
{
failure[s] = 0;
}
}
}
this.gotoFn = gotoFn;
this.output = output;
this.failure = failure;
};
AhoCorasick.prototype.search = function(string)
{
var state = 0;
var results = [];
for (var i=0; i<string.length; i++)
{
var l = string[i];
while (state > 0 && !(l in this.gotoFn[state]))
{
state = this.failure[state];
}
if (!(l in this.gotoFn[state]))
{
continue;
}
state = this.gotoFn[state][l];
if (this.output[state].length)
{
var foundStrs = this.output[state];
results.push([i, foundStrs]);
}
}
return results;
};