Java源码示例:opennlp.tools.util.Sequence

示例1
public void POSExample() {
    try (InputStream input = new FileInputStream(
            new File("en-pos-maxent.bin"));) {

        // To lower case example
        String lowerCaseVersion = sentence.toLowerCase();
        out.println(lowerCaseVersion);

        // Pull out tokens
        List<String> list = new ArrayList<>();
        Scanner scanner = new Scanner(sentence);
        while (scanner.hasNext()) {
            list.add(scanner.next());
        }
        // Convert list to an array
        String[] words = new String[1];
        words = list.toArray(words);

        // Build model
        POSModel posModel = new POSModel(input);
        POSTaggerME posTagger = new POSTaggerME(posModel);

        // Tag words
        String[] posTags = posTagger.tag(words);
        for (int i = 0; i < posTags.length; i++) {
            out.println(words[i] + " - " + posTags[i]);
        }

        // Find top sequences
        Sequence sequences[] = posTagger.topKSequences(words);
        for (Sequence sequence : sequences) {
            out.println(sequence);
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}
 
示例2
/**
 * Generates a specified number of lemma classes for the input tokens
 * and tags.
 * @param numTaggings the number of analysis
 * @param toks the sentence tokens
 * @param tags the sentente tags
 * @return the specified number of lemma classes for the input tokens
 */
public String[][] lemmatize(int numTaggings, String[] toks, String[] tags) {
  Sequence[] bestSequences = model.bestSequences(numTaggings, toks,
      new Object[] { tags }, contextGenerator, sequenceValidator);
  String[][] lemmaClasses = new String[bestSequences.length][];
  for (int i = 0; i < lemmaClasses.length; i++) {
    List<String> t = bestSequences[i].getOutcomes();
    lemmaClasses[i] = t.toArray(new String[t.size()]);
  }
  return lemmaClasses;
}
 
示例3
private static void usingOpenNLPPOSModel() {
        System.out.println("OpenNLP POSModel Examples");
        try (InputStream modelIn = new FileInputStream(
                new File(getModelDir(), "en-pos-maxent.bin"));) {
            POSModel model = new POSModel(modelIn);
            POSTaggerME tagger = new POSTaggerME(model);

            // Introduction sentences
//            sentence = tokenizeSentence("The cow jumped over the moon.");
//            sentence = tokenizeSentence("Bill used the force to force the manger to tear the bill in two.");
//            sentence = tokenizeSentence("AFAIK she H8 cth!");
//            sentence = tokenizeSentence("BTW had a GR8 tym at the party BBIAM.");
//            sentence = tokenizeSentence("Whether \"Blue\" was correct or not (it’s not) is debatable");
            String tags[] = tagger.tag(sentence);
            double probs[] = tagger.probs();

            for (int i = 0; i < sentence.length; i++) {
                System.out.print(sentence[i] + "/" + tags[i] + " ");
            }
            System.out.println();
            // Use import opennlp.tools.util.Sequence; instead of
            // import opennlp.model.Sequence
            System.out.println("topSequences");
            Sequence topSequences[] = tagger.topKSequences(sentence);
            for (int i = 0; i < topSequences.length; i++) {
                System.out.println(topSequences[i]);
//                List<String> list = topSequences[i].getOutcomes();
//                for(String outcome : list) {
//                    System.out.print(outcome + " ");
//                    System.out.println();
//                }
            }
            System.out.println();

            System.out.println("occurrences and probabilities");
//            DecimalFormat decimalFormat = new DecimalFormat("##.###");
            for (int i = 0; i < topSequences.length; i++) {
                List<String> outcomes = topSequences[i].getOutcomes();
                double probabilities[] = topSequences[i].getProbs();
                for (int j = 0; j < outcomes.size(); j++) {
                    System.out.printf("%s/%5.3f ",outcomes.get(j),probabilities[j]);
                }
                System.out.println();
            }
            System.out.println();
//            
//            // Getting the dictionasry tags
//            POSTaggerFactory ptf = model.getFactory();
//            TagDictionary tagDictionary = ptf.getTagDictionary();
//            String dictionaryTags[] = tagDictionary.getTags("the");
//            System.out.println(dictionaryTags.length);
//            for(String word : dictionaryTags) {
//                 System.out.println(word);
//            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
 
示例4
@Override
    public void predict(RecommenderContext aContext, CAS aCas)
        throws RecommendationException
    {
        POSModel model = aContext.get(KEY_MODEL).orElseThrow(() -> 
                new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
        
        POSTaggerME tagger = new POSTaggerME(model);

        Type sentenceType = getType(aCas, Sentence.class);
        Type predictedType = getPredictedType(aCas);
        Type tokenType = getType(aCas, Token.class);

        Feature scoreFeature = getScoreFeature(aCas);
        Feature predictedFeature = getPredictedFeature(aCas);
        Feature isPredictionFeature = getIsPredictionFeature(aCas);

        int predictionCount = 0;
        for (AnnotationFS sentence : select(aCas, sentenceType)) {
            if (predictionCount >= traits.getPredictionLimit()) {
                break;
            }
            predictionCount++;
            
            List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
            String[] tokens = tokenAnnotations.stream()
                .map(AnnotationFS::getCoveredText)
                .toArray(String[]::new);

            Sequence[] bestSequences = tagger.topKSequences(tokens);

//            LOG.debug("Total number of sequences predicted: {}", bestSequences.length);

            for (int s = 0; s < Math.min(bestSequences.length, maxRecommendations); s++) {
                Sequence sequence = bestSequences[s];
                List<String> outcomes = sequence.getOutcomes();
                double[] probabilities = sequence.getProbs();

//                LOG.debug("Sequence {} score {}", s, sequence.getScore());
//                LOG.debug("Outcomes: {}", outcomes);
//                LOG.debug("Probabilities: {}", asList(probabilities));

                for (int i = 0; i < outcomes.size(); i++) {
                    String label = outcomes.get(i);

                    // Do not return PADded tokens
                    if (PAD.equals(label)) {
                        continue;
                    }

                    AnnotationFS token = tokenAnnotations.get(i);
                    int begin = token.getBegin();
                    int end = token.getEnd();
                    double confidence = probabilities[i];

                    // Create the prediction
                    AnnotationFS annotation = aCas.createAnnotation(predictedType, begin, end);
                    annotation.setStringValue(predictedFeature, label);
                    annotation.setDoubleValue(scoreFeature, confidence);
                    annotation.setBooleanValue(isPredictionFeature, true);
                    aCas.addFsToIndexes(annotation);
                }
            }
        }
    }
 
示例5
public Sequence[] topKSequences(String[] sentence, String[] tags) {
  return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
      new Object[] { tags }, contextGenerator, sequenceValidator);
}
 
示例6
public Sequence[] topKSequences(String[] sentence, String[] tags,
    double minSequenceScore) {
  return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
      new Object[] { tags }, minSequenceScore, contextGenerator,
      sequenceValidator);
}
 
示例7
public Sequence[] topKLemmaClasses(String[] sentence, String[] tags) {
  return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
      new Object[] { tags }, contextGenerator, sequenceValidator);
}
 
示例8
public Sequence[] topKLemmaClasses(String[] sentence, String[] tags,
    double minSequenceScore) {
  return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
      new Object[] { tags }, minSequenceScore, contextGenerator,
      sequenceValidator);
}