Java源码示例:org.apache.lucene.analysis.CachingTokenFilter

示例1
public void testMultipleSources() throws Exception {
  final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(whitespaceMockTokenizer(buffer1.toString()));
  final TokenStream source1 = new CachingTokenFilter(tee1);

  tee1.addAttribute(CheckClearAttributesAttribute.class);

  MockTokenizer tokenizer = new MockTokenizer(tee1.getAttributeFactory(), MockTokenizer.WHITESPACE, false);
  tokenizer.setReader(new StringReader(buffer2.toString()));
  final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(tokenizer);
  final TokenStream source2 = tee2;

  assertTokenStreamContents(source1, tokens1);
  assertTokenStreamContents(source2, tokens2);

  TokenStream lowerCasing = new LowerCaseFilter(source1);
  String[] lowerCaseTokens = new String[tokens1.length];
  for (int i = 0; i < tokens1.length; i++)
    lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
  assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
 
示例2
protected Query doToQuery(QueryShardContext context) throws IOException {
//        Analyzer analyzer = context.getMapperService().searchAnalyzer();
        Analyzer analyzer = new WhitespaceAnalyzer();
        try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
            CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            if (termAtt == null) {
                return null;
            }
            List<CustomSpanTermQuery> clauses = new ArrayList<>();
            stream.reset();
            while (stream.incrementToken()) {
                Term term = new Term(fieldName, termAtt.getBytesRef());
                    clauses.add(new CustomSpanTermQuery(term));
            }
            return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
        } catch (IOException e) {
            throw new RuntimeException("Error analyzing query text", e);
        }


    }
 
示例3
public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
  Document doc = new Document();
  try (TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", "abcd   "))) {
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    Field f = new Field("field", stream, customType);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
  }
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
  assertNotNull(termsEnum.next());
  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(12, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}
 
示例4
private static boolean hasGaps(CachingTokenFilter stream) throws IOException {
    PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class);
    stream.reset();
    while (stream.incrementToken()) {
        if (posIncAtt.getPositionIncrement() > 1) {
            return true;
        }
    }
    return false;
}
 
示例5
private static CachingTokenFilter cache(TokenStream in) {
    if (in instanceof CachingTokenFilter) {
        return (CachingTokenFilter) in;
    }
    return new CachingTokenFilter(in);
}