Skip to content

Commit 83e9d4b

Browse files
committed
Flip the order of matching in ProcessSemgrexRequest so that for each pattern, it matches all of the sentences at once. Allows for operations on the complete batch of matches, such as the new uniq operator
1 parent 3671ced commit 83e9d4b

File tree

3 files changed

+31
-16
lines changed

3 files changed

+31
-16
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ProcessSemgrexRequest.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@ public class ProcessSemgrexRequest extends ProcessProtobufRequest {
3232
/**
3333
* Builds a single inner SemgrexResult structure from the pair of a SemgrexPattern and a SemanticGraph
3434
*/
35-
public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, int patternIdx, int graphIdx) {
35+
public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, List<SemgrexMatch> matches, int patternIdx, int graphIdx) {
3636
CoreNLPProtos.SemgrexResponse.SemgrexResult.Builder semgrexResultBuilder = CoreNLPProtos.SemgrexResponse.SemgrexResult.newBuilder();
37-
SemgrexMatcher matcher = pattern.matcher(graph);
38-
while (matcher.find()) {
37+
for (SemgrexMatch matcher : matches) {
3938
CoreNLPProtos.SemgrexResponse.Match.Builder matchBuilder = CoreNLPProtos.SemgrexResponse.Match.newBuilder();
4039
matchBuilder.setMatchIndex(matcher.getMatch().index());
4140
matchBuilder.setSemgrexIndex(patternIdx);
@@ -105,14 +104,27 @@ public static CoreNLPProtos.SemgrexResponse processRequest(CoreNLPProtos.Semgrex
105104
}
106105

107106
List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList());
108-
int graphIdx = 0;
107+
List<Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>>> allMatches = new ArrayList<>();
109108
for (CoreMap sentence : sentences) {
109+
allMatches.add(new Pair<>(sentence, new ArrayList<>()));
110+
}
111+
for (SemgrexPattern pattern : patterns) {
112+
List<Pair<CoreMap, List<SemgrexMatch>>> patternMatches = pattern.matchSentences(sentences, true);
113+
for (int i = 0; i < sentences.size(); ++i) {
114+
Pair<CoreMap, List<SemgrexMatch>> sentenceMatches = patternMatches.get(i);
115+
allMatches.get(i).second().add(new Pair<>(pattern, sentenceMatches.second()));
116+
}
117+
}
118+
119+
int graphIdx = 0;
120+
for (Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>> sentenceMatches : allMatches) {
110121
CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder();
111122

112123
int patternIdx = 0;
113-
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
114-
for (SemgrexPattern pattern : patterns) {
115-
graphResultBuilder.addResult(matchSentence(pattern, graph, patternIdx, graphIdx));
124+
SemanticGraph graph = sentenceMatches.first().get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
125+
for (Pair<SemgrexPattern, List<SemgrexMatch>> patternMatches : sentenceMatches.second()) {
126+
SemgrexPattern pattern = patternMatches.first();
127+
graphResultBuilder.addResult(matchSentence(pattern, graph, patternMatches.second(), patternIdx, graphIdx));
116128
++patternIdx;
117129
}
118130

src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,13 +351,16 @@ public List<Pair<CoreMap, List<SemgrexMatch>>> postprocessMatches(List<Pair<Core
351351
*<br>
352352
* Non-matching sentences are currently not returned (may change in the future to return an empty list).
353353
*/
354-
public List<Pair<CoreMap, List<SemgrexMatch>>> matchSentences(List<CoreMap> sentences) {
354+
public List<Pair<CoreMap, List<SemgrexMatch>>> matchSentences(List<CoreMap> sentences, boolean keepEmptyMatches) {
355355
List<Pair<CoreMap, List<SemgrexMatch>>> matches = new ArrayList<>();
356356
for (CoreMap sentence : sentences) {
357357
SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
358358
SemanticGraph enhanced = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
359359
SemgrexMatcher matcher = matcher(graph);
360-
if ( ! matcher.find()) {
360+
if (!matcher.find()) {
361+
if (keepEmptyMatches) {
362+
matches.add(new Pair<>(sentence, new ArrayList<>()));
363+
}
361364
continue;
362365
}
363366
matches.add(new Pair<>(sentence, new ArrayList<>()));
@@ -602,7 +605,7 @@ public static void main(String[] args) throws IOException {
602605
}
603606
}
604607

605-
List<Pair<CoreMap, List<SemgrexMatch>>> matches = semgrex.matchSentences(sentences);
608+
List<Pair<CoreMap, List<SemgrexMatch>>> matches = semgrex.matchSentences(sentences, false);
606609

607610
for (Pair<CoreMap, List<SemgrexMatch>> sentenceMatches : matches) {
608611
CoreMap sentence = sentenceMatches.first();

test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,7 +1492,7 @@ public List<CoreMap> buildSmallBatch() {
14921492
public void testBatchSearch() {
14931493
List<CoreMap> sentences = buildSmallBatch();
14941494
SemgrexPattern semgrex = SemgrexPattern.compile("{word:foo}=x > {}=y");
1495-
List<Pair<CoreMap, List<SemgrexMatch>>> matches = semgrex.matchSentences(sentences);
1495+
List<Pair<CoreMap, List<SemgrexMatch>>> matches = semgrex.matchSentences(sentences, false);
14961496
String[] expectedMatches = {
14971497
BATCH_PARSES[0],
14981498
BATCH_PARSES[1],
@@ -1535,21 +1535,21 @@ public void testParsesUniq() {
15351535
public void testBatchUniq() {
15361536
List<CoreMap> sentences = buildSmallBatch();
15371537
SemgrexPattern semgrex = SemgrexPattern.compile("{word:foo}=x > {}=y :: uniq x");
1538-
List<Pair<CoreMap, List<SemgrexMatch>>> matches = semgrex.matchSentences(sentences);
1538+
List<Pair<CoreMap, List<SemgrexMatch>>> matches = semgrex.matchSentences(sentences, false);
15391539
// only the first foo sentence should match when using "uniq x"
15401540
assertEquals(1, matches.size());
15411541
assertEquals(BATCH_PARSES[0], matches.get(0).first().get(CoreAnnotations.TextAnnotation.class));
15421542
assertEquals(1, matches.get(0).second().size());
15431543

15441544
semgrex = SemgrexPattern.compile("{word:foo}=x > {}=y :: uniq");
1545-
matches = semgrex.matchSentences(sentences);
1545+
matches = semgrex.matchSentences(sentences, false);
15461546
// same thing happens when using "uniq" and no nodes - only one match will occur
15471547
assertEquals(1, matches.size());
15481548
assertEquals(BATCH_PARSES[0], matches.get(0).first().get(CoreAnnotations.TextAnnotation.class));
15491549
assertEquals(1, matches.get(0).second().size());
15501550

15511551
semgrex = SemgrexPattern.compile("{word:foo}=x > {}=y :: uniq y");
1552-
matches = semgrex.matchSentences(sentences);
1552+
matches = semgrex.matchSentences(sentences, false);
15531553
// now it should match both foo>bar and foo>baz
15541554
assertEquals(2, matches.size());
15551555
assertEquals(BATCH_PARSES[0], matches.get(0).first().get(CoreAnnotations.TextAnnotation.class));
@@ -1558,7 +1558,7 @@ public void testBatchUniq() {
15581558
assertEquals(1, matches.get(1).second().size());
15591559

15601560
semgrex = SemgrexPattern.compile("{}=x > {}=y :: uniq x y");
1561-
matches = semgrex.matchSentences(sentences);
1561+
matches = semgrex.matchSentences(sentences, false);
15621562
// now it should batch each of foo>bar, bar>baz, foo>baz
15631563
assertEquals(3, matches.size());
15641564
assertEquals(BATCH_PARSES[0], matches.get(0).first().get(CoreAnnotations.TextAnnotation.class));
@@ -1570,7 +1570,7 @@ public void testBatchUniq() {
15701570
}
15711571

15721572
public static void outputBatchResults(SemgrexPattern pattern, List<CoreMap> sentences) {
1573-
List<Pair<CoreMap, List<SemgrexMatch>>> matches = pattern.matchSentences(sentences);
1573+
List<Pair<CoreMap, List<SemgrexMatch>>> matches = pattern.matchSentences(sentences, false);
15741574
for (Pair<CoreMap, List<SemgrexMatch>> sentenceMatch : matches) {
15751575
System.out.println("Pattern matched at:");
15761576
System.out.println(sentenceMatch.first());

0 commit comments

Comments
 (0)