HEX

File: //usr/share/perl5/Plucene/Analysis/StopAnalyzer.pm
package Plucene::Analysis::StopAnalyzer;

=head1 NAME 

Plucene::Analysis::StopAnalyzer - the stop-word analyzer

=head1 SYNOPSIS

	my Plucene::Analysis::StopFilter $sf 
		= Plucene::Analysis::StopAnalyzer->new(@args);

=head1 DESCRIPTION

Filters LetterTokenizer with LowerCaseFilter and StopFilter.

=head1 METHODS

=cut

use strict;
use warnings;

use Plucene::Analysis::LowerCaseTokenizer;
use Plucene::Analysis::StopFilter;
use base 'Plucene::Analysis::Analyzer';

my @stopwords = (
	"a",     "and",  "are",   "as",    "at",   "be",   "but",  "by",
	"for",   "if",   "in",    "into",  "is",   "it",   "no",   "not",
	"of",    "on",   "or",    "s",     "such", "t",    "that", "the",
	"their", "then", "there", "these", "they", "this", "to",   "was",
	"will",  "with"
);

=head2 tokenstream

	my Plucene::Analysis::StopFilter $sf 
		= Plucene::Analysis::StopAnalyzer->new(@args);

Filters LowerCaseTokenizer with StopFilter.

=cut

sub tokenstream {
	my $self = shift;
	return Plucene::Analysis::StopFilter->new({
			input    => Plucene::Analysis::LowerCaseTokenizer->new(@_),
			stoplist => \@stopwords
		});
}

1;