Java源码示例:opennlp.tools.parser.Parse

示例1
private Parse[] getNounPhrases(Parse parse) {
  List<Parse> nps = new ArrayList<Parse>(10);
  List<Parse> parts = new ArrayList<Parse>();
  parts.add(parse);
  while (parts.size() > 0) {
    List<Parse> newParts = new ArrayList<Parse>();
    for (int pi=0,pn=parts.size();pi<pn;pi++) {
      Parse cp = parts.get(pi);
      if (cp.getType().equals("NP") && cp.isFlat()) {
        nps.add(cp);
      }
      else if (!cp.isPosTag()) {
        newParts.addAll(Arrays.asList(cp.getChildren()));
      }
    }
    parts = newParts;
  }
  return nps.toArray(new Parse[nps.size()]);
}
 
示例2
public static String getSubject(final Parse parse) {
	if (parse.getType().equals(LABEL_TOP)) {
		return getSubject(parse.getChildren()[0]);
	}

	if (parse.getType().equals(LABEL_SENTENCE)) {
		for (Parse child : parse.getChildren()) {
			if (child.getType().equals(LABEL_NOUN_PHRASE)) {
				return getSubject(child);
			}
		}
	}
	if (parse.getType().equals(LABEL_NOUN_PHRASE)) {
		return getFirstOccurenceForType(parse, LABEL_NAME_PREFIX);
	}

	return "";
}
 
示例3
public static String getPredicate(final Parse parse) {
	if (parse.getType().equals(LABEL_TOP)) {
		return getPredicate(parse.getChildren()[0]);
	}

	if (parse.getType().equals(LABEL_SENTENCE)) {
		for (Parse child : parse.getChildren()) {
			if (child.getType().equals(LABEL_VERBAL_PHRASE)) {
				return getPredicate(child);
			}
		}
		return "";
	}
	if (parse.getType().equals(LABEL_VERBAL_PHRASE)) {
		return getFirstOccurenceForType(parse, LABEL_VERB_PREFIX);
	}

	return "";
}
 
示例4
public static String getObject(final Parse parse) {
	String object = "";
	if (parse.getType().equals(LABEL_TOP)) {
		return getObject(parse.getChildren()[0]);
	}

	if (parse.getType().equals(LABEL_SENTENCE)) {
		for (Parse child : parse.getChildren()) {
			if (child.getType().equals(LABEL_VERBAL_PHRASE)) {
				object = getObject(child); 
				if (!object.isEmpty()){
					return object;
				}
			}
		}
		return object;
	}
	if (parse.getType().equals(LABEL_VERBAL_PHRASE)) {
		return getFirstOccurenceForType(parse, LABEL_NAME_PREFIX);
	}

	return object;
}
 
示例5
public static String getConstituent(final Parse parse, final String syntactic_cat,
		String lexical_cat) {
	String object = "";
	if (parse.getType().equals(LABEL_TOP)) {
		return getConstituent(parse.getChildren()[0], syntactic_cat, lexical_cat);
	}

	if (parse.getType().equals(LABEL_SENTENCE)) {
		for (Parse child : parse.getChildren()) {
			if (child.getType().equals(syntactic_cat)) {
				object = getConstituent(child, syntactic_cat, lexical_cat); 
				if (!object.isEmpty()){
					return object;
				}
			}
		}
		return object;
	}
	if (parse.getType().equals(syntactic_cat)) {
		return getFirstOccurenceForType(parse, lexical_cat);
	}

	return object;
}
 
示例6
public CorefParse(List<Parse> parses, DiscourseEntity[] entities) {
	this.parses = parses;
	parseMap = new HashMap<Parse, Integer>();
	for (int ei = 0, en = entities.length; ei < en; ei++) {
		if (entities[ei].getNumMentions() > 1) {
			for (Iterator<MentionContext> mi = entities[ei].getMentions(); mi
					.hasNext();) {
				MentionContext mc = mi.next();
				Parse mentionParse = ((DefaultParse) mc.getParse())
						.getParse();
				parseMap.put(mentionParse, ei + 1);
				// System.err.println("CorefParse: "+mc.getParse().hashCode()+" -> "+
				// (ei+1));
			}
		}
	}
}
 
示例7
private void print(Parse p, int deep) {
	if (p.getType().length() > 1 && p.getType().substring(0, 2).equals(Parser.TOK_NODE))
		return;
	
	char[] spaces = new char[deep*2];
	Arrays.fill(spaces, ' ');
	Span span = p.getSpan();
    System.out.print(new String(spaces) + p.getType() + " -- " + p.getText().substring(span.getStart(),
			span.getEnd()));
    if (parseMap.containsKey(p)) {
		System.out.print("#" + parseMap.get(p));
	}
    System.out.print("\n");
    for (Parse child : p.getChildren()) {
    	print(child, new Integer(deep + 1));
    }
}
 
示例8
public double matchChildren(Parse pa1, Parse pa2) {
	String p1NodeLabel = pa1.getLabel();
	String p2NodeLabel = pa2.getLabel();
	Parse[] children1 = pa1.getChildren();
	Parse[] children2 = pa2.getChildren();
	double matchFound = 0;
	
	if (pa1 == null || pa2 == null) {
		return 0;
	}
	
	if (p1NodeLabel.equals(p2NodeLabel)) {
		if (pa1.getCoveredText().equals(pa2.getCoveredText())) {
			matchFound = 1;
		}
	}
	
	return matchFound + matchChildren(children1[0], children2[0]) + matchChildren(children1[1], children2[1]);
}
 
示例9
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
	POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
	Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
	Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
	double score = 0;
	
	Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
	Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
	
	if (passage.contains(ca)) {
		for (int i =0; i < questionParse.length; i++) {
			score += matchChildren(questionParse[i],passageParse[i]);
		}
	}
	
	return score;
}
 
示例10
public double compareParseType(Parse[] pa1, Parse[] pa2, boolean verbose){
	double numMatches=0;
	Map<String, String> key1 = new HashMap<String, String>();
	for (int i=0;i<pa1.length;i++){
		key1.put(pa1[i].getType(),"y");
		//pa1h.put(key[0],"y");
	}
	for (int j=0;j<pa2.length;j++){
		String key2=pa2[j].getType();
		if (key1.containsKey(key2)){ 
			numMatches++;
			if (verbose) System.out.println("\n");
			pa2[j].show();
			if (verbose) System.out.println("type: "+pa2[j].getType());
		}
	}
	if (verbose) System.out.println("numTypeMatches "+numMatches);
	return numMatches;
}
 
示例11
public void parserTest1() throws IOException {
	if (!this.modelsAreInitialized) init();
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); 
	Parse[] results = ParserTool.parseLine("Jane Austen was very modest about her own genius ."+this.q,
			parser, 1);
	Parse[] qResults = ParserTool.parseLine(this.q,parser, 1);
	Parse[] rChn = (results[0].getChildren())[0].getChildren();
	
	results[0].expandTopNode(results[0]);
	for (int i = 0; i < results.length; i++) {
		results[i].show();
	}
	for (int i = 0; i < qResults.length; i++) {
		qResults[i].show();
	}
	System.out.print("\n\n");
	for (int i = 0; i < rChn.length; i++) {
		rChn[i].show();
		System.out.print("\n");
	}
}
 
示例12
public static Parse[] parsePassageText(String p) throws InvalidFormatException{
	
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
	Parser parser = ParserFactory.create(
			parserModel,
			20, // beam size
			0.95); // advance percentage
 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);


		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
示例13
public void parserTest1() throws IOException {
	if (!this.modelsAreInitialized) init();
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); 
	Parse[] results = ParserTool.parseLine("Jane Austen was very modest about her own genius ."+this.q,
			parser, 1);
	Parse[] qResults = ParserTool.parseLine(this.q,parser, 1);
	Parse[] rChn = (results[0].getChildren())[0].getChildren();
	
	results[0].expandTopNode(results[0]);
	for (int i = 0; i < results.length; i++) {
		results[i].show();
	}
	for (int i = 0; i < qResults.length; i++) {
		qResults[i].show();
	}
	System.out.print("\n\n");
	for (int i = 0; i < rChn.length; i++) {
		rChn[i].show();
		System.out.print("\n");
	}
}
 
示例14
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best

		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
示例15
/** THIS METHOD IS NOT USED 
 * Extracts NEs from a parse tree that has been augmented with NE tags.
 * 
 * @param parse a parse tree augmented with NE tags
 * @return NEs per NE type
 */
// TODO only works with OpenNLP taggers so far
@SuppressWarnings("unchecked")
public static String[][] extractNes(Parse parse) {
	// initialize dynamic arrays
	ArrayList[] nes = new ArrayList[finders.length];
	for (int i = 0; i < nes.length; i++) nes[i] = new ArrayList();
	
	// depth-first search on the parse tree
	extractNesRec(parse, nes);
	
	// copy to static arrays
	String[][] results = new String[finders.length][];
	for (int i = 0; i < nes.length; i++)
		results[i] = (String[]) nes[i].toArray(new String[nes[i].size()]);
	
	return results;
}
 
示例16
private static void usingOpenNLP() {
        String fileLocation = getModelDir() + "/en-parser-chunking.bin";
        System.out.println(fileLocation);
        try (InputStream modelInputStream = new FileInputStream(fileLocation);) {
            ParserModel model = new ParserModel(modelInputStream);
            Parser parser = ParserFactory.create(model);
            String sentence = "The cow jumped over the moon";
            // Used to demonstrate difference between NER and Parser
            sentence = "He was the last person to see Fred.";

            Parse parses[] = ParserTool.parseLine(sentence, parser, 3);
            for (Parse parse : parses) {
                // First display
                parse.show();
                // Second display
//                parse.showCodeTree();
                // Third display
//                System.out.println("Children");
//                Parse children[] = parse.getChildren();
//                for (Parse parseElement : children) {
//                    System.out.println(parseElement);
//                    System.out.println(parseElement.getText());
//                    System.out.println(parseElement.getType());
//                    Parse tags[] = parseElement.getTagNodes();
//                    System.out.println("Tags");
//                    for (Parse tag : tags) {
//                        System.out.println("[" + tag + "]" + " type: " + tag.getType()
//                                + "  Probability: " + tag.getProb()
//                                + "  Label: " + tag.getLabel());
//                    }
//                }
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
 
示例17
private int movePastCopula(int i, Parse[] toks) {
  if (i < toks.length && toks[i].getType().startsWith("V")) {
    if (copulaPattern.matcher(toks[i].toString()).matches()) {
      i++;
    }
  }
  return (i);
}
 
示例18
private Parse getContainingNounPhrase(Parse token) {
  Parse parent = token.getParent();
  if (parent.getType().equals("NP")) {
    return parent;
  }
  return null;
}
 
示例19
private int getTokenIndexFollowingPhrase(Parse p,Parse[] toks) {
  Parse[] ptok = p.getTagNodes();
  Parse lastToken = ptok[ptok.length-1];
  for (int ti=0,tl=toks.length;ti<tl;ti++) {
    if (toks[ti] == lastToken) {
      return(ti+1);
    }
  }
  return(toks.length);
}
 
示例20
private Set<String> getSynsetSet(Parse np) {

    Set<String> synsetSet = new HashSet<String>();
    String[] lemmas = getLemmas(np);
    for (int li = 0; li < lemmas.length; li++) {
      String[] synsets = wordnet.getParentSenseKeys(lemmas[li],"NN",0);
      for (int si=0,sn=synsets.length;si<sn;si++) {
        synsetSet.add(synsets[si]);
      }
    }
    return (synsetSet);
  }
 
示例21
private void generateWordNetFeatures(Parse focusNoun, List<String> features) {

    Parse[] toks = focusNoun.getTagNodes();
    if (toks[toks.length - 1].getType().startsWith("NNP")) {
      return;
    }
    //check wordnet
    Set<String> synsets = getSynsetSet(focusNoun);

    for (String synset : synsets) {
      features.add("s=" + synset);
    }
  }
 
示例22
private void generateWordFeatures(Parse focusNoun, List<String> features) {
  Parse[] toks = focusNoun.getTagNodes();
  int nsi = 0;
  for (; nsi < toks.length - 1; nsi++) {
    features.add("mw=" + toks[nsi]);
    features.add("mt=" + toks[nsi].getType());
  }
  features.add("hw=" + toks[nsi]);
  features.add("ht=" + toks[nsi].getType());
}
 
示例23
public static void main(String args[]) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	//wordnetDir="WordNet-3.0/dict/";
	String question="Who is Abraham Lincoln?";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
   // modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			System.out.println(context[i].substring(3));
		}
	}
}
 
示例24
public String[] getFocusNoun(String question) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	wordnetDir="WordNet-3.0/dict/";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
    modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	String[] focus=new String[2];
	int p=0;
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			//System.out.println(context[i].substring(3));
			focus[p++]=context[i].substring(3);
		}
	}
	return focus;
}
 
示例25
public Event next() {
    int split = line.indexOf(' ');
    String outcome = line.substring(0, split);
    String question = line.substring(split + 1);
    Parse query = ParserTool.parseLine(question, parser, 1)[0];
    return (new Event(outcome, atcg.getContext(query)));
}
 
示例26
public TripletRelation extractRelationFromSentence(String sentence){
	TripletRelation rel = new TripletRelation();
	
	Parse p = parseSentence(sentence);
	if (p != null){
		rel = new TripletRelation(ParserExtractor.getSubject(p),
				ParserExtractor.getPredicate(p),
				ParserExtractor.getObject(p) );
	}
	else {
		System.out.println("no valid parse from parseSentence");
	}
	
	return rel;
}
 
示例27
public Parse parseSentence(String sentence){
	Parse topParses[] = ParserTool.parseLine(sentence, parser, 1);
	if (topParses.length == 0)
		return null;
	else 
		return topParses[0];
}
 
示例28
public static void printParseTree(Parse p, int deep) {
if (p.getType().length() > 1 && p.getType().substring(0, 2).equals(Parser.TOK_NODE))
	return;

char[] spaces = new char[deep*2];
Arrays.fill(spaces, ' ');
Span span = p.getSpan();
      System.out.println(new String(spaces) + p.getType() + " -- " + p.getText().substring(span.getStart(),
		span.getEnd()));
      for (Parse child : p.getChildren()) {
      	printParseTree(child, new Integer(deep + 1));
      }
  }
 
示例29
public void show() {
	for (int pi = 0, pn = parses.size(); pi < pn; pi++) {
		Parse p = parses.get(pi);
		show(p);
		System.out.println();
	}
}
 
示例30
public void print() {
	for (int pi = 0, pn = parses.size(); pi < pn; pi++) {
		Parse p = parses.get(pi);
		print(p, 0);
		System.out.println();
	}
}