Java源码示例:opennlp.tools.parser.Parse
示例1
private Parse[] getNounPhrases(Parse parse) {
List<Parse> nps = new ArrayList<Parse>(10);
List<Parse> parts = new ArrayList<Parse>();
parts.add(parse);
while (parts.size() > 0) {
List<Parse> newParts = new ArrayList<Parse>();
for (int pi=0,pn=parts.size();pi<pn;pi++) {
Parse cp = parts.get(pi);
if (cp.getType().equals("NP") && cp.isFlat()) {
nps.add(cp);
}
else if (!cp.isPosTag()) {
newParts.addAll(Arrays.asList(cp.getChildren()));
}
}
parts = newParts;
}
return nps.toArray(new Parse[nps.size()]);
}
示例2
public static String getSubject(final Parse parse) {
if (parse.getType().equals(LABEL_TOP)) {
return getSubject(parse.getChildren()[0]);
}
if (parse.getType().equals(LABEL_SENTENCE)) {
for (Parse child : parse.getChildren()) {
if (child.getType().equals(LABEL_NOUN_PHRASE)) {
return getSubject(child);
}
}
}
if (parse.getType().equals(LABEL_NOUN_PHRASE)) {
return getFirstOccurenceForType(parse, LABEL_NAME_PREFIX);
}
return "";
}
示例3
public static String getPredicate(final Parse parse) {
if (parse.getType().equals(LABEL_TOP)) {
return getPredicate(parse.getChildren()[0]);
}
if (parse.getType().equals(LABEL_SENTENCE)) {
for (Parse child : parse.getChildren()) {
if (child.getType().equals(LABEL_VERBAL_PHRASE)) {
return getPredicate(child);
}
}
return "";
}
if (parse.getType().equals(LABEL_VERBAL_PHRASE)) {
return getFirstOccurenceForType(parse, LABEL_VERB_PREFIX);
}
return "";
}
示例4
public static String getObject(final Parse parse) {
String object = "";
if (parse.getType().equals(LABEL_TOP)) {
return getObject(parse.getChildren()[0]);
}
if (parse.getType().equals(LABEL_SENTENCE)) {
for (Parse child : parse.getChildren()) {
if (child.getType().equals(LABEL_VERBAL_PHRASE)) {
object = getObject(child);
if (!object.isEmpty()){
return object;
}
}
}
return object;
}
if (parse.getType().equals(LABEL_VERBAL_PHRASE)) {
return getFirstOccurenceForType(parse, LABEL_NAME_PREFIX);
}
return object;
}
示例5
public static String getConstituent(final Parse parse, final String syntactic_cat,
String lexical_cat) {
String object = "";
if (parse.getType().equals(LABEL_TOP)) {
return getConstituent(parse.getChildren()[0], syntactic_cat, lexical_cat);
}
if (parse.getType().equals(LABEL_SENTENCE)) {
for (Parse child : parse.getChildren()) {
if (child.getType().equals(syntactic_cat)) {
object = getConstituent(child, syntactic_cat, lexical_cat);
if (!object.isEmpty()){
return object;
}
}
}
return object;
}
if (parse.getType().equals(syntactic_cat)) {
return getFirstOccurenceForType(parse, lexical_cat);
}
return object;
}
示例6
public CorefParse(List<Parse> parses, DiscourseEntity[] entities) {
this.parses = parses;
parseMap = new HashMap<Parse, Integer>();
for (int ei = 0, en = entities.length; ei < en; ei++) {
if (entities[ei].getNumMentions() > 1) {
for (Iterator<MentionContext> mi = entities[ei].getMentions(); mi
.hasNext();) {
MentionContext mc = mi.next();
Parse mentionParse = ((DefaultParse) mc.getParse())
.getParse();
parseMap.put(mentionParse, ei + 1);
// System.err.println("CorefParse: "+mc.getParse().hashCode()+" -> "+
// (ei+1));
}
}
}
}
示例7
private void print(Parse p, int deep) {
if (p.getType().length() > 1 && p.getType().substring(0, 2).equals(Parser.TOK_NODE))
return;
char[] spaces = new char[deep*2];
Arrays.fill(spaces, ' ');
Span span = p.getSpan();
System.out.print(new String(spaces) + p.getType() + " -- " + p.getText().substring(span.getStart(),
span.getEnd()));
if (parseMap.containsKey(p)) {
System.out.print("#" + parseMap.get(p));
}
System.out.print("\n");
for (Parse child : p.getChildren()) {
print(child, new Integer(deep + 1));
}
}
示例8
public double matchChildren(Parse pa1, Parse pa2) {
String p1NodeLabel = pa1.getLabel();
String p2NodeLabel = pa2.getLabel();
Parse[] children1 = pa1.getChildren();
Parse[] children2 = pa2.getChildren();
double matchFound = 0;
if (pa1 == null || pa2 == null) {
return 0;
}
if (p1NodeLabel.equals(p2NodeLabel)) {
if (pa1.getCoveredText().equals(pa2.getCoveredText())) {
matchFound = 1;
}
}
return matchFound + matchChildren(children1[0], children2[0]) + matchChildren(children1[1], children2[1]);
}
示例9
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
double score = 0;
Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
if (passage.contains(ca)) {
for (int i =0; i < questionParse.length; i++) {
score += matchChildren(questionParse[i],passageParse[i]);
}
}
return score;
}
示例10
public double compareParseType(Parse[] pa1, Parse[] pa2, boolean verbose){
double numMatches=0;
Map<String, String> key1 = new HashMap<String, String>();
for (int i=0;i<pa1.length;i++){
key1.put(pa1[i].getType(),"y");
//pa1h.put(key[0],"y");
}
for (int j=0;j<pa2.length;j++){
String key2=pa2[j].getType();
if (key1.containsKey(key2)){
numMatches++;
if (verbose) System.out.println("\n");
pa2[j].show();
if (verbose) System.out.println("type: "+pa2[j].getType());
}
}
if (verbose) System.out.println("numTypeMatches "+numMatches);
return numMatches;
}
示例11
public void parserTest1() throws IOException {
if (!this.modelsAreInitialized) init();
Parser parser = ParserFactory.create(
this.parserModel,
20, // beam size
0.95);
Parse[] results = ParserTool.parseLine("Jane Austen was very modest about her own genius ."+this.q,
parser, 1);
Parse[] qResults = ParserTool.parseLine(this.q,parser, 1);
Parse[] rChn = (results[0].getChildren())[0].getChildren();
results[0].expandTopNode(results[0]);
for (int i = 0; i < results.length; i++) {
results[i].show();
}
for (int i = 0; i < qResults.length; i++) {
qResults[i].show();
}
System.out.print("\n\n");
for (int i = 0; i < rChn.length; i++) {
rChn[i].show();
System.out.print("\n");
}
}
示例12
public static Parse[] parsePassageText(String p) throws InvalidFormatException{
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
Parser parser = ParserFactory.create(
parserModel,
20, // beam size
0.95); // advance percentage
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
String sent= StringUtils.join(tks," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例13
public void parserTest1() throws IOException {
if (!this.modelsAreInitialized) init();
Parser parser = ParserFactory.create(
this.parserModel,
20, // beam size
0.95);
Parse[] results = ParserTool.parseLine("Jane Austen was very modest about her own genius ."+this.q,
parser, 1);
Parse[] qResults = ParserTool.parseLine(this.q,parser, 1);
Parse[] rChn = (results[0].getChildren())[0].getChildren();
results[0].expandTopNode(results[0]);
for (int i = 0; i < results.length; i++) {
results[i].show();
}
for (int i = 0; i < qResults.length; i++) {
qResults[i].show();
}
System.out.print("\n\n");
for (int i = 0; i < rChn.length; i++) {
rChn[i].show();
System.out.print("\n");
}
}
示例14
public Parse[] parsePassageText(String p) throws InvalidFormatException{
if (!modelsAreInitialized)init();
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
Parser parser = ParserFactory.create(
this.parserModel,
20, // beam size
0.95); // advance percentage
//find sentences, tokenize each, parse each, return top parse for each
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
//StringTokenizer st = new StringTokenizer(tks[i]);
//There are several tokenizers available. SimpleTokenizer works best
String sent= StringUtils.join(tks," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例15
/** THIS METHOD IS NOT USED
* Extracts NEs from a parse tree that has been augmented with NE tags.
*
* @param parse a parse tree augmented with NE tags
* @return NEs per NE type
*/
// TODO only works with OpenNLP taggers so far
@SuppressWarnings("unchecked")
public static String[][] extractNes(Parse parse) {
// initialize dynamic arrays
ArrayList[] nes = new ArrayList[finders.length];
for (int i = 0; i < nes.length; i++) nes[i] = new ArrayList();
// depth-first search on the parse tree
extractNesRec(parse, nes);
// copy to static arrays
String[][] results = new String[finders.length][];
for (int i = 0; i < nes.length; i++)
results[i] = (String[]) nes[i].toArray(new String[nes[i].size()]);
return results;
}
示例16
private static void usingOpenNLP() {
String fileLocation = getModelDir() + "/en-parser-chunking.bin";
System.out.println(fileLocation);
try (InputStream modelInputStream = new FileInputStream(fileLocation);) {
ParserModel model = new ParserModel(modelInputStream);
Parser parser = ParserFactory.create(model);
String sentence = "The cow jumped over the moon";
// Used to demonstrate difference between NER and Parser
sentence = "He was the last person to see Fred.";
Parse parses[] = ParserTool.parseLine(sentence, parser, 3);
for (Parse parse : parses) {
// First display
parse.show();
// Second display
// parse.showCodeTree();
// Third display
// System.out.println("Children");
// Parse children[] = parse.getChildren();
// for (Parse parseElement : children) {
// System.out.println(parseElement);
// System.out.println(parseElement.getText());
// System.out.println(parseElement.getType());
// Parse tags[] = parseElement.getTagNodes();
// System.out.println("Tags");
// for (Parse tag : tags) {
// System.out.println("[" + tag + "]" + " type: " + tag.getType()
// + " Probability: " + tag.getProb()
// + " Label: " + tag.getLabel());
// }
// }
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
示例17
private int movePastCopula(int i, Parse[] toks) {
if (i < toks.length && toks[i].getType().startsWith("V")) {
if (copulaPattern.matcher(toks[i].toString()).matches()) {
i++;
}
}
return (i);
}
示例18
private Parse getContainingNounPhrase(Parse token) {
Parse parent = token.getParent();
if (parent.getType().equals("NP")) {
return parent;
}
return null;
}
示例19
private int getTokenIndexFollowingPhrase(Parse p,Parse[] toks) {
Parse[] ptok = p.getTagNodes();
Parse lastToken = ptok[ptok.length-1];
for (int ti=0,tl=toks.length;ti<tl;ti++) {
if (toks[ti] == lastToken) {
return(ti+1);
}
}
return(toks.length);
}
示例20
private Set<String> getSynsetSet(Parse np) {
Set<String> synsetSet = new HashSet<String>();
String[] lemmas = getLemmas(np);
for (int li = 0; li < lemmas.length; li++) {
String[] synsets = wordnet.getParentSenseKeys(lemmas[li],"NN",0);
for (int si=0,sn=synsets.length;si<sn;si++) {
synsetSet.add(synsets[si]);
}
}
return (synsetSet);
}
示例21
private void generateWordNetFeatures(Parse focusNoun, List<String> features) {
Parse[] toks = focusNoun.getTagNodes();
if (toks[toks.length - 1].getType().startsWith("NNP")) {
return;
}
//check wordnet
Set<String> synsets = getSynsetSet(focusNoun);
for (String synset : synsets) {
features.add("s=" + synset);
}
}
示例22
private void generateWordFeatures(Parse focusNoun, List<String> features) {
Parse[] toks = focusNoun.getTagNodes();
int nsi = 0;
for (; nsi < toks.length - 1; nsi++) {
features.add("mw=" + toks[nsi]);
features.add("mt=" + toks[nsi].getType());
}
features.add("hw=" + toks[nsi]);
features.add("ht=" + toks[nsi].getType());
}
示例23
public static void main(String args[]) throws IOException
{
String wordnetDir = System.getProperty("wordnet.dir");
//wordnetDir="WordNet-3.0/dict/";
String question="Who is Abraham Lincoln?";
AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
String q=null;
String modelsDirProp = System.getProperty("model.dir");
// modelsDirProp="opennlp-models/";
File modelsDir = new File(modelsDirProp);
InputStream chunkerStream = new FileInputStream(
new File(modelsDir,"en-chunker.bin"));
ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
ChunkerME chunker = new ChunkerME(chunkerModel);
InputStream posStream = new FileInputStream(
new File(modelsDir,"en-pos-maxent.bin"));
POSModel posModel = new POSModel(posStream);
POSTaggerME tagger = new POSTaggerME(posModel);
Parser parser = new ChunkParser(chunker, tagger);
Parse query = ParserTool.parseLine(question,parser,1)[0];
String[] context=atcg.getContext(query);
for(int i=0;i<context.length;i++)
{
if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
{
System.out.println(context[i].substring(3));
}
}
}
示例24
public String[] getFocusNoun(String question) throws IOException
{
String wordnetDir = System.getProperty("wordnet.dir");
wordnetDir="WordNet-3.0/dict/";
AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
String q=null;
String modelsDirProp = System.getProperty("model.dir");
modelsDirProp="opennlp-models/";
File modelsDir = new File(modelsDirProp);
InputStream chunkerStream = new FileInputStream(
new File(modelsDir,"en-chunker.bin"));
ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
ChunkerME chunker = new ChunkerME(chunkerModel);
InputStream posStream = new FileInputStream(
new File(modelsDir,"en-pos-maxent.bin"));
POSModel posModel = new POSModel(posStream);
POSTaggerME tagger = new POSTaggerME(posModel);
Parser parser = new ChunkParser(chunker, tagger);
Parse query = ParserTool.parseLine(question,parser,1)[0];
String[] context=atcg.getContext(query);
String[] focus=new String[2];
int p=0;
for(int i=0;i<context.length;i++)
{
if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
{
//System.out.println(context[i].substring(3));
focus[p++]=context[i].substring(3);
}
}
return focus;
}
示例25
public Event next() {
int split = line.indexOf(' ');
String outcome = line.substring(0, split);
String question = line.substring(split + 1);
Parse query = ParserTool.parseLine(question, parser, 1)[0];
return (new Event(outcome, atcg.getContext(query)));
}
示例26
public TripletRelation extractRelationFromSentence(String sentence){
TripletRelation rel = new TripletRelation();
Parse p = parseSentence(sentence);
if (p != null){
rel = new TripletRelation(ParserExtractor.getSubject(p),
ParserExtractor.getPredicate(p),
ParserExtractor.getObject(p) );
}
else {
System.out.println("no valid parse from parseSentence");
}
return rel;
}
示例27
public Parse parseSentence(String sentence){
Parse topParses[] = ParserTool.parseLine(sentence, parser, 1);
if (topParses.length == 0)
return null;
else
return topParses[0];
}
示例28
public static void printParseTree(Parse p, int deep) {
if (p.getType().length() > 1 && p.getType().substring(0, 2).equals(Parser.TOK_NODE))
return;
char[] spaces = new char[deep*2];
Arrays.fill(spaces, ' ');
Span span = p.getSpan();
System.out.println(new String(spaces) + p.getType() + " -- " + p.getText().substring(span.getStart(),
span.getEnd()));
for (Parse child : p.getChildren()) {
printParseTree(child, new Integer(deep + 1));
}
}
示例29
public void show() {
for (int pi = 0, pn = parses.size(); pi < pn; pi++) {
Parse p = parses.get(pi);
show(p);
System.out.println();
}
}
示例30
public void print() {
for (int pi = 0, pn = parses.size(); pi < pn; pi++) {
Parse p = parses.get(pi);
print(p, 0);
System.out.println();
}
}