Java源码示例:opennlp.tools.util.Sequence
示例1
public void POSExample() {
try (InputStream input = new FileInputStream(
new File("en-pos-maxent.bin"));) {
// To lower case example
String lowerCaseVersion = sentence.toLowerCase();
out.println(lowerCaseVersion);
// Pull out tokens
List<String> list = new ArrayList<>();
Scanner scanner = new Scanner(sentence);
while (scanner.hasNext()) {
list.add(scanner.next());
}
// Convert list to an array
String[] words = new String[1];
words = list.toArray(words);
// Build model
POSModel posModel = new POSModel(input);
POSTaggerME posTagger = new POSTaggerME(posModel);
// Tag words
String[] posTags = posTagger.tag(words);
for (int i = 0; i < posTags.length; i++) {
out.println(words[i] + " - " + posTags[i]);
}
// Find top sequences
Sequence sequences[] = posTagger.topKSequences(words);
for (Sequence sequence : sequences) {
out.println(sequence);
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
示例2
/**
* Generates a specified number of lemma classes for the input tokens
* and tags.
* @param numTaggings the number of analysis
* @param toks the sentence tokens
* @param tags the sentente tags
* @return the specified number of lemma classes for the input tokens
*/
public String[][] lemmatize(int numTaggings, String[] toks, String[] tags) {
Sequence[] bestSequences = model.bestSequences(numTaggings, toks,
new Object[] { tags }, contextGenerator, sequenceValidator);
String[][] lemmaClasses = new String[bestSequences.length][];
for (int i = 0; i < lemmaClasses.length; i++) {
List<String> t = bestSequences[i].getOutcomes();
lemmaClasses[i] = t.toArray(new String[t.size()]);
}
return lemmaClasses;
}
示例3
private static void usingOpenNLPPOSModel() {
System.out.println("OpenNLP POSModel Examples");
try (InputStream modelIn = new FileInputStream(
new File(getModelDir(), "en-pos-maxent.bin"));) {
POSModel model = new POSModel(modelIn);
POSTaggerME tagger = new POSTaggerME(model);
// Introduction sentences
// sentence = tokenizeSentence("The cow jumped over the moon.");
// sentence = tokenizeSentence("Bill used the force to force the manger to tear the bill in two.");
// sentence = tokenizeSentence("AFAIK she H8 cth!");
// sentence = tokenizeSentence("BTW had a GR8 tym at the party BBIAM.");
// sentence = tokenizeSentence("Whether \"Blue\" was correct or not (it’s not) is debatable");
String tags[] = tagger.tag(sentence);
double probs[] = tagger.probs();
for (int i = 0; i < sentence.length; i++) {
System.out.print(sentence[i] + "/" + tags[i] + " ");
}
System.out.println();
// Use import opennlp.tools.util.Sequence; instead of
// import opennlp.model.Sequence
System.out.println("topSequences");
Sequence topSequences[] = tagger.topKSequences(sentence);
for (int i = 0; i < topSequences.length; i++) {
System.out.println(topSequences[i]);
// List<String> list = topSequences[i].getOutcomes();
// for(String outcome : list) {
// System.out.print(outcome + " ");
// System.out.println();
// }
}
System.out.println();
System.out.println("occurrences and probabilities");
// DecimalFormat decimalFormat = new DecimalFormat("##.###");
for (int i = 0; i < topSequences.length; i++) {
List<String> outcomes = topSequences[i].getOutcomes();
double probabilities[] = topSequences[i].getProbs();
for (int j = 0; j < outcomes.size(); j++) {
System.out.printf("%s/%5.3f ",outcomes.get(j),probabilities[j]);
}
System.out.println();
}
System.out.println();
//
// // Getting the dictionasry tags
// POSTaggerFactory ptf = model.getFactory();
// TagDictionary tagDictionary = ptf.getTagDictionary();
// String dictionaryTags[] = tagDictionary.getTags("the");
// System.out.println(dictionaryTags.length);
// for(String word : dictionaryTags) {
// System.out.println(word);
// }
} catch (IOException e) {
e.printStackTrace();
}
}
示例4
@Override
public void predict(RecommenderContext aContext, CAS aCas)
throws RecommendationException
{
POSModel model = aContext.get(KEY_MODEL).orElseThrow(() ->
new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
POSTaggerME tagger = new POSTaggerME(model);
Type sentenceType = getType(aCas, Sentence.class);
Type predictedType = getPredictedType(aCas);
Type tokenType = getType(aCas, Token.class);
Feature scoreFeature = getScoreFeature(aCas);
Feature predictedFeature = getPredictedFeature(aCas);
Feature isPredictionFeature = getIsPredictionFeature(aCas);
int predictionCount = 0;
for (AnnotationFS sentence : select(aCas, sentenceType)) {
if (predictionCount >= traits.getPredictionLimit()) {
break;
}
predictionCount++;
List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
String[] tokens = tokenAnnotations.stream()
.map(AnnotationFS::getCoveredText)
.toArray(String[]::new);
Sequence[] bestSequences = tagger.topKSequences(tokens);
// LOG.debug("Total number of sequences predicted: {}", bestSequences.length);
for (int s = 0; s < Math.min(bestSequences.length, maxRecommendations); s++) {
Sequence sequence = bestSequences[s];
List<String> outcomes = sequence.getOutcomes();
double[] probabilities = sequence.getProbs();
// LOG.debug("Sequence {} score {}", s, sequence.getScore());
// LOG.debug("Outcomes: {}", outcomes);
// LOG.debug("Probabilities: {}", asList(probabilities));
for (int i = 0; i < outcomes.size(); i++) {
String label = outcomes.get(i);
// Do not return PADded tokens
if (PAD.equals(label)) {
continue;
}
AnnotationFS token = tokenAnnotations.get(i);
int begin = token.getBegin();
int end = token.getEnd();
double confidence = probabilities[i];
// Create the prediction
AnnotationFS annotation = aCas.createAnnotation(predictedType, begin, end);
annotation.setStringValue(predictedFeature, label);
annotation.setDoubleValue(scoreFeature, confidence);
annotation.setBooleanValue(isPredictionFeature, true);
aCas.addFsToIndexes(annotation);
}
}
}
}
示例5
public Sequence[] topKSequences(String[] sentence, String[] tags) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
new Object[] { tags }, contextGenerator, sequenceValidator);
}
示例6
public Sequence[] topKSequences(String[] sentence, String[] tags,
double minSequenceScore) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
new Object[] { tags }, minSequenceScore, contextGenerator,
sequenceValidator);
}
示例7
public Sequence[] topKLemmaClasses(String[] sentence, String[] tags) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
new Object[] { tags }, contextGenerator, sequenceValidator);
}
示例8
public Sequence[] topKLemmaClasses(String[] sentence, String[] tags,
double minSequenceScore) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
new Object[] { tags }, minSequenceScore, contextGenerator,
sequenceValidator);
}