Java源码示例:org.apache.lucene.search.spell.SpellChecker
示例1
@Inject
public VocabularyNeo4jImpl(GraphDatabaseService graph,
@Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil,
NodeTransformer transformer) throws IOException {
this.graph = graph;
this.curieUtil = curieUtil;
this.transformer = transformer;
if (null != neo4jLocation) {
Directory indexDirectory =
FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/node/node_auto_index"))
.toPath());
Directory spellDirectory =
FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/spellchecker"))
.toPath());
spellChecker = new SpellChecker(spellDirectory);
try (IndexReader reader = DirectoryReader.open(indexDirectory)) {
IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
spellChecker.indexDictionary(new LuceneDictionary(reader, NodeProperties.LABEL
+ LuceneUtils.EXACT_SUFFIX), config, true);
}
} else {
spellChecker = null;
}
}
示例2
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateDistance() throws Exception {
TestSpellChecker checker = new TestSpellChecker();
@SuppressWarnings({"rawtypes"})
NamedList spellchecker = new NamedList();
spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
File indexDir = createTempDir().toFile();
spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE, JaroWinklerDistance.class.getName());
SolrCore core = h.getCore();
String dictName = checker.init(spellchecker, core);
assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
h.getCore().withSearcher(searcher -> {
checker.build(core, searcher);
SpellChecker sc = checker.getSpellChecker();
assertTrue("sc is null and it shouldn't be", sc != null);
StringDistance sd = sc.getStringDistance();
assertTrue("sd is null and it shouldn't be", sd != null);
assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance);
return null;
});
}
示例3
@NotNull
private static SpellChecker createIndexSpellchecker(@NotNull Directory index) throws IOException {
Directory spellCheckerDirectory = new RAMDirectory();
IndexReader indexReader = DirectoryReader.open(index);
Analyzer analyzer = new SimpleAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f);
SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory);
spellChecker.indexDictionary(dictionary, config, false);
spellChecker.setAccuracy(SPELLCHECK_ACCURACY);
return spellChecker;
}
示例4
@NotNull
private static Analyzer spellcheckAnalyzer(@NotNull SpellChecker spellChecker) {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(@NotNull String field) {
Tokenizer source = new WhitespaceTokenizer();
source.setReader(new StringReader(field));
SpellCheckerTokenFilter spellCheckFilter = new SpellCheckerTokenFilter(defaultTokenFilter(source), spellChecker);
TokenFilter concatenatingFilter = new ConcatenatingFilter(spellCheckFilter, ' ');
return new TokenStreamComponents(source, concatenatingFilter);
}
};
}
示例5
protected void buildSpellCheckerIndex(SearchFactory searchFactory) {
IndexReader reader = null;
Directory dir = null;
long _entr = System.currentTimeMillis();
File spellCheckIndexDir = new File("lucene_index/spellcheck");
log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath());
ReaderProvider readerProvider = searchFactory.getReaderProvider();
try {
reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]);
dir = FSDirectory.open(spellCheckIndexDir);
SpellChecker spell = new SpellChecker(dir);
spell.clearIndex();
spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD));
spell.close();
dir.close();
dir = null;
long _exit = System.currentTimeMillis();
log.info("Took {1} (ms) to build SpellChecker index in {0}",
spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr)));
} catch (Exception exc) {
log.error("Failed to build spell checker index!", exc);
} finally {
if (dir != null) {
try {
dir.close();
} catch (Exception zzz) {
}
}
if (reader != null) {
readerProvider.closeReader(reader);
}
}
}
示例6
public static synchronized void forceSpellCheckerRenewal(String indexPath){
SpellChecker sp = spellCheckMap.get(indexPath);
if(sp!=null) {
try {
sp.close();
} catch (IOException e) {
org.webdsl.logging.Logger.error("EXCEPTION",e);
}
}
spellCheckMap.remove(indexPath);
}
示例7
private void createSpellCheckSearcher(boolean indexNewlyBuilt) {
try {
log.info("Create spell checker on new index ...");
synchronized (createSpellCheckSearcherLock) {// o_clusterOK by:pb if service is only configured on one vm, which is recommended way
closeSpellCheckSearcher();
if (indexNewlyBuilt) {
replaceSpellCheckFiles();
}
final File spellDictionaryFile = new File(searchModule.getSpellCheckerIndexPath());
final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);
if (!IndexReader.indexExists(spellIndexDirectory)) {
log.error("SpellChecker index does not exist [" + spellDictionaryFile.getAbsolutePath() + "]");
return;
}
spellChecker = new SpellChecker(spellIndexDirectory);
spellChecker.setAccuracy(0.7f);
}
if (indexNewlyBuilt) {
log.info("Cleanup old spell checker index files ...");
cleanupSpellCheckFiles();
}
} catch (IOException ex) {
log.error("SpellChecker couldn't be created.", ex);
}
}
示例8
public SpellChecker getSpellChecker() {
return spellChecker;
}
示例9
@Override
public SpellChecker getSpellChecker(){
return spellChecker;
}
示例10
SpellCheckerTokenFilter(@NotNull final TokenStream tokenStream, @NotNull final SpellChecker spellChecker) {
super(tokenStream);
this.spellChecker = spellChecker;
}
示例11
public void updateSpellCheckerIndex(NodeDocumentVersion nDocVer) {
log.info("Observed Wine added/updated event for {1} from Thread {0}",
Thread.currentThread().getName(), String.valueOf(nDocVer));
String text = (nDocVer != null) ? nDocVer.getText() : null;
if (text != null) {
Dictionary dictionary = null;
try {
FullTextEntityManager ftEm = (FullTextEntityManager) entityManager;
SearchFactory searchFactory = ftEm.getSearchFactory();
dictionary = new SetDictionary(text, searchFactory.getAnalyzer("wine_en"));
} catch (IOException ioExc) {
log.error("Failed to analyze dictionary text {0} from Wine {1} to update spell checker due to: {2}" +
text + nDocVer.getUuid() + ioExc.toString());
}
if (dictionary != null) {
Directory dir = null;
// only allow one thread to update the index at a time ...
// the Dictionary is pre-computed, so it should happen quickly
// ...
// this synchronized approach only works because this component
// is application-scoped
synchronized (this) {
try {
dir = FSDirectory.open(new File("lucene_index/spellcheck"));
SpellChecker spell = new SpellChecker(dir);
spell.indexDictionary(dictionary);
spell.close();
log.info("Successfully updated the spell checker index after Document added/updated.");
} catch (Exception exc) {
log.error("Failed to update the spell checker index!", exc);
} finally {
if (dir != null) {
try {
dir.close();
} catch (Exception zzz) {
}
}
}
}
}
}
}
示例12
@SuppressWarnings("deprecation")
public static ArrayList<String> findSpellSuggestionsForField(Class<?> entityClass, String baseDir,
String suggestedField, int maxSuggestionCount, float accuracy, boolean morePopular,
Analyzer analyzer, String toSuggestOn) {
if (toSuggestOn == null || toSuggestOn.isEmpty())
return new ArrayList<String>();
SpellChecker spellChecker = null;
IndexReader fieldIR = null;
boolean hasSuggestions = false;
String indexPath = baseDir+suggestedField;
try {
spellChecker = getSpellChecker(indexPath);
spellChecker.setAccuracy(accuracy);
TokenStream tokenStream = analyzer.tokenStream(suggestedField, new StringReader(
toSuggestOn));
CharTermAttributeImpl ta = (CharTermAttributeImpl) tokenStream
.addAttribute(CharTermAttribute.class);
ArrayList<String[]> allSuggestions = new ArrayList<String[]>();
String word;
String[] suggestions;
while (tokenStream.incrementToken()) {
word = ta.term();
suggestions = null;
if (!morePopular) {
suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount);
} else {
if (fieldIR == null)
fieldIR = getIndexReader(entityClass);
suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount, fieldIR,
suggestedField, true);
}
if (suggestions == null || suggestions.length == 0)
suggestions = new String[] { word };
else
hasSuggestions = true;
allSuggestions.add(suggestions);
}
if (!hasSuggestions)
// if no suggestions were found, return empty list
return new ArrayList<String>();
else
return formSuggestions(maxSuggestionCount, allSuggestions);
} catch (Exception e) {
org.webdsl.logging.Logger.error("EXCEPTION",e);
//if something goes wrong, close and remove current SpellChecker instance, so it gets renewed
try {
spellChecker.close();
} catch (IOException e2) {
org.webdsl.logging.Logger.error("EXCEPTION",e2);
}
spellCheckMap.remove(indexPath);
}
finally {
searchfactory.getReaderProvider().closeReader(fieldIR);
}
return new ArrayList<String>();
}
示例13
/**
* Creates a new spell-check index based on search-index
*/
public void createSpellIndex() {
if (isSpellCheckEnabled) {
IndexReader indexReader = null;
try {
log.info("Start generating Spell-Index...");
long startSpellIndexTime = 0;
if (log.isDebugEnabled()) {
startSpellIndexTime = System.currentTimeMillis();
}
final Directory indexDir = FSDirectory.open(new File(indexPath));
indexReader = IndexReader.open(indexDir);
// 1. Create content spellIndex
final File spellDictionaryFile = new File(spellDictionaryPath);
final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true
final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
contentSpellChecker.indexDictionary(contentDictionary);
// 2. Create title spellIndex
final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true
final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
titleSpellChecker.indexDictionary(titleDictionary);
// 3. Create description spellIndex
final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
descriptionSpellChecker.indexDictionary(descriptionDictionary);
// 4. Create author spellIndex
final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true
final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
authorSpellChecker.indexDictionary(authorDictionary);
// Merge all part spell indexes (content,title etc.) to one common spell index
final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
merger.addIndexesNoOptimize(directories);
merger.optimize();
merger.close();
spellChecker = new SpellChecker(spellIndexDirectory);
spellChecker.setAccuracy(0.7f);
if (log.isDebugEnabled()) {
log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
}
log.info("New generated Spell-Index ready to use.");
} catch (final IOException ioEx) {
log.warn("Can not create SpellIndex", ioEx);
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (final IOException e) {
log.warn("Can not close indexReader properly", e);
}
}
}
}
}
示例14
/**
* Creates a new spell-check index based on search-index
*/
public static void createSpellIndex(final SearchModule searchModule) {
final String tempSearchIndexPath = searchModule.getTempSearchIndexPath();
final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath();
IndexReader indexReader = null;
try {
log.info("Start generating spell check index ...");
long startSpellIndexTime = 0;
if (log.isDebugEnabled()) {
startSpellIndexTime = System.currentTimeMillis();
}
final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main"));
indexReader = IndexReader.open(indexDir);
// 1. Create content spellIndex
log.info("Generating 'content' spell check index ...");
final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH);
FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath);
final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
contentSpellChecker.indexDictionary(contentDictionary);
// 2. Create title spellIndex
log.info("Generating 'title' spell check index ...");
final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH);
FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath);
final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
titleSpellChecker.indexDictionary(titleDictionary);
// 3. Create description spellIndex
log.info("Generating 'description' spell check index ...");
final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH);
FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath);
final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
descriptionSpellChecker.indexDictionary(descriptionDictionary);
// 4. Create author spellIndex
log.info("Generating 'author' spell check index ...");
final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH);
FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath);
final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
authorSpellChecker.indexDictionary(authorDictionary);
log.info("Merging spell check indices ...");
// Merge all part spell indexes (content,title etc.) to one common spell index
final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath);
FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true);
final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir);
final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
merger.addIndexesNoOptimize(directories);
log.info("Optimizing spell check index ...");
merger.optimize();
merger.close();
tempSpellIndexDirectory.close();
contentSpellChecker.close();
contentSpellIndexDirectory.close();
titleSpellChecker.close();
titleSpellIndexDirectory.close();
descriptionSpellChecker.close();
descriptionSpellIndexDirectory.close();
authorSpellChecker.close();
authorSpellIndexDirectory.close();
FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
if (log.isDebugEnabled()) {
log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms.");
}
} catch (final IOException ioEx) {
log.warn("Can not create spell check index.", ioEx);
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (final IOException e) {
log.warn("Can not close indexReader properly", e);
}
}
}
}