Skip to content

Commit

Permalink
Add .emptyIndex to Semgraph valueOf
Browse files Browse the repository at this point in the history
The node caching in valueOf now needs to be a 2DMap so that index &
emptyIndex are both respected

Add a test that valueOf is correctly creating the expected graph
  • Loading branch information
AngledLuffa committed Oct 14, 2023
1 parent 94d783c commit 93abf16
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 8 deletions.
35 changes: 27 additions & 8 deletions src/edu/stanford/nlp/semgraph/SemanticGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringParsingTask;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.TwoDimensionalMap;
import edu.stanford.nlp.util.logging.Redwood;

import static edu.stanford.nlp.trees.GrammaticalRelation.ROOT;
Expand Down Expand Up @@ -1712,6 +1714,10 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
* <br>
* Indices are represented by a dash separated number after the word:
* {@code [ate-1 subj>Bill-2 ...}
* <br>
* An EmptyIndex for fake words such as in UD datasets is represented
* by a period separated number after the regular index
* {@code [ate-1 dobj>Bill-1.1 ...]}
*/
public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) {
return (new SemanticGraphParsingTask(s, language, sentIndex)).parse();
Expand Down Expand Up @@ -1861,7 +1867,9 @@ public SemanticGraph makeSoftCopy() {

// ============================================================================

private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("(.*)-([0-9]+)");
// the chunk at the end captures an integer without the [.]
// if there is an emptyIndex attached to the node's index
private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("(.*)-([0-9]+)(?:(?:[.])([0-9]+))?");

/**
* This nested class is a helper for valueOf(). It represents the task of
Expand All @@ -1870,7 +1878,7 @@ public SemanticGraph makeSoftCopy() {
private static class SemanticGraphParsingTask extends StringParsingTask<SemanticGraph> {

private SemanticGraph sg;
private Map<Integer, IndexedWord> indexesUsed = Generics.newHashMap();
private TwoDimensionalMap<Integer, Integer, IndexedWord> indexesUsed = TwoDimensionalMap.hashMap();
private final Language language;
private final Integer sentIndex;

Expand Down Expand Up @@ -1935,17 +1943,22 @@ private void readDep(IndexedWord gov, String reln) {

private IndexedWord makeVertex(String word) {
Integer index; // initialized below
Pair<String, Integer> wordAndIndex = readWordAndIndex(word);
Integer emptyIndex = 0;
Triple<String, Integer, Integer> wordAndIndex = readWordAndIndex(word);
if (wordAndIndex != null) {
word = wordAndIndex.first();
index = wordAndIndex.second();
emptyIndex = wordAndIndex.third();
} else {
index = getNextFreeIndex();
}
if (indexesUsed.containsKey(index)) {
return indexesUsed.get(index);
if (indexesUsed.contains(index, emptyIndex)) {
return indexesUsed.get(index, emptyIndex);
}
IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index);
if (emptyIndex != 0) {
ifl.setEmptyIndex(emptyIndex);
}
// log.info("SemanticGraphParsingTask>>> word = " + word);
// log.info("SemanticGraphParsingTask>>> index = " + index);
// log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed);
Expand All @@ -1954,18 +1967,24 @@ private IndexedWord makeVertex(String word) {
ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]);
if (wordAndTag.length > 1)
ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]);
indexesUsed.put(index, ifl);
indexesUsed.put(index, emptyIndex, ifl);
return ifl;
}

private static Pair<String, Integer> readWordAndIndex(String word) {
private static Triple<String, Integer, Integer> readWordAndIndex(String word) {
Matcher matcher = WORD_AND_INDEX_PATTERN.matcher(word);
if (!matcher.matches()) {
return null;
} else {
word = matcher.group(1);
Integer index = Integer.valueOf(matcher.group(2));
return new Pair<>(word, index);
Integer emptyIndex;
if (matcher.group(3) != null) {
emptyIndex = Integer.valueOf(matcher.group(3));
} else {
emptyIndex = 0;
}
return new Triple<>(word, index, emptyIndex);
}
}

Expand Down
52 changes: 52 additions & 0 deletions test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,58 @@ public void testValueOfIndices() {
Assert.assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);
}

/**
* Test the vertices and edges of a very simple valueOf graph with indices added
*/
@Test
public void testValueOfEmptyIndices() {
// test some with tags and some without
SemanticGraph sg = SemanticGraph.valueOf("[A/foo-2 obj> B/bar-1 obj> C-1.2 nsubj> [D-1.1 obj> E-0]]");

List<IndexedWord> words = sg.vertexListSorted();
Assert.assertEquals(words.size(), 5);
IndexedWord A = words.get(4);
IndexedWord B = words.get(1);
IndexedWord C = words.get(3);
IndexedWord D = words.get(2);
IndexedWord E = words.get(0);

Assert.assertEquals(A.word(), "A");
Assert.assertEquals(A.tag(), "foo");
Assert.assertEquals(B.word(), "B");
Assert.assertEquals(B.tag(), "bar");
Assert.assertEquals(C.word(), "C");
Assert.assertEquals(D.word(), "D");
Assert.assertEquals(E.word(), "E");

Assert.assertEquals(sg.getAllEdges(A, B).size(), 1);
Assert.assertEquals(sg.getParentsWithReln(B, "obj").size(), 1);

Assert.assertEquals(sg.getAllEdges(A, C).size(), 1);
Assert.assertEquals(sg.getParentsWithReln(C, "obj").size(), 1);

Assert.assertEquals(sg.getAllEdges(A, D).size(), 1);
Assert.assertEquals(sg.getParentsWithReln(D, "nsubj").size(), 1);
Assert.assertEquals(sg.getParentsWithReln(D, "obj").size(), 0);
Assert.assertEquals(sg.getParentsWithReln(D, "dep").size(), 0);

Assert.assertEquals(sg.getAllEdges(A, E).size(), 0);
Assert.assertEquals(sg.getAllEdges(D, E).size(), 1);
Assert.assertEquals(sg.getParentsWithReln(E, "obj").size(), 1);
Assert.assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);

Assert.assertEquals(A.index(), 2);
Assert.assertEquals(A.getEmptyIndex(), 0);
Assert.assertEquals(B.index(), 1);
Assert.assertEquals(B.getEmptyIndex(), 0);
Assert.assertEquals(C.index(), 1);
Assert.assertEquals(C.getEmptyIndex(), 2);
Assert.assertEquals(D.index(), 1);
Assert.assertEquals(D.getEmptyIndex(), 1);
Assert.assertEquals(E.index(), 0);
Assert.assertEquals(E.getEmptyIndex(), 0);
}

/**
* Test the vertices and edges if we reuse some indices in valueOf
*/
Expand Down

0 comments on commit 93abf16

Please sign in to comment.