Java源码示例:org.apache.lucene.analysis.CachingTokenFilter
示例1
public void testMultipleSources() throws Exception {
final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(whitespaceMockTokenizer(buffer1.toString()));
final TokenStream source1 = new CachingTokenFilter(tee1);
tee1.addAttribute(CheckClearAttributesAttribute.class);
MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader(buffer2.toString()));
final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
final TokenStream source2 = tee2;
assertTokenStreamContents(source1, tokens1);
assertTokenStreamContents(source2, tokens2);
TokenStream lowerCasing = new LowerCaseFilter(source1);
String[] lowerCaseTokens = new String[tokens1.length];
for (int i = 0; i < tokens1.length; i++)
lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
示例2
protected Query doToQuery(QueryShardContext context) throws IOException {
// Analyzer analyzer = context.getMapperService().searchAnalyzer();
Analyzer analyzer = new WhitespaceAnalyzer();
try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
if (termAtt == null) {
return null;
}
List<CustomSpanTermQuery> clauses = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
Term term = new Term(fieldName, termAtt.getBytesRef());
clauses.add(new CustomSpanTermQuery(term));
}
return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
} catch (IOException e) {
throw new RuntimeException("Error analyzing query text", e);
}
}
示例3
public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
Document doc = new Document();
try (TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", "abcd "))) {
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
Field f = new Field("field", stream, customType);
doc.add(f);
doc.add(f);
w.addDocument(doc);
}
w.close();
IndexReader r = DirectoryReader.open(dir);
TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
assertNotNull(termsEnum.next());
PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertEquals(2, termsEnum.totalTermFreq());
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(0, dpEnum.startOffset());
assertEquals(4, dpEnum.endOffset());
dpEnum.nextPosition();
assertEquals(8, dpEnum.startOffset());
assertEquals(12, dpEnum.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
r.close();
dir.close();
}
示例4
private static boolean hasGaps(CachingTokenFilter stream) throws IOException {
PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class);
stream.reset();
while (stream.incrementToken()) {
if (posIncAtt.getPositionIncrement() > 1) {
return true;
}
}
return false;
}
示例5
private static CachingTokenFilter cache(TokenStream in) {
if (in instanceof CachingTokenFilter) {
return (CachingTokenFilter) in;
}
return new CachingTokenFilter(in);
}