From 93abf166cbfffc72aad7cbbebb1d7e5a5a592964 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Fri, 13 Oct 2023 12:54:20 -0700 Subject: [PATCH] Add .emptyIndex to Semgraph valueOf The node caching in valueOf now needs to be a 2DMap so that index & emptyIndex are both respected Add a test that valueOf is correctly creating the expected graph --- .../stanford/nlp/semgraph/SemanticGraph.java | 35 ++++++++++--- .../nlp/semgraph/SemanticGraphTest.java | 52 +++++++++++++++++++ 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraph.java b/src/edu/stanford/nlp/semgraph/SemanticGraph.java index 50c9ac73bc..51de48f171 100644 --- a/src/edu/stanford/nlp/semgraph/SemanticGraph.java +++ b/src/edu/stanford/nlp/semgraph/SemanticGraph.java @@ -20,6 +20,8 @@ import edu.stanford.nlp.util.Pair; import edu.stanford.nlp.util.StringParsingTask; import edu.stanford.nlp.util.StringUtils; +import edu.stanford.nlp.util.Triple; +import edu.stanford.nlp.util.TwoDimensionalMap; import edu.stanford.nlp.util.logging.Redwood; import static edu.stanford.nlp.trees.GrammaticalRelation.ROOT; @@ -1712,6 +1714,10 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) { *
* Indices are represented by a dash separated number after the word: * {@code [ate-1 subj>Bill-2 ...} + *
+ * An EmptyIndex for fake words such as in UD datasets is represented + * by a period separated number after the regular index + * {@code [ate-1 dobj>Bill-1.1 ...]} */ public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) { return (new SemanticGraphParsingTask(s, language, sentIndex)).parse(); @@ -1861,7 +1867,9 @@ public SemanticGraph makeSoftCopy() { // ============================================================================ - private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("(.*)-([0-9]+)"); + // the chunk at the end captures an integer without the [.] + // if there is an emptyIndex attached to the node's index + private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("(.*)-([0-9]+)(?:(?:[.])([0-9]+))?"); /** * This nested class is a helper for valueOf(). It represents the task of @@ -1870,7 +1878,7 @@ public SemanticGraph makeSoftCopy() { private static class SemanticGraphParsingTask extends StringParsingTask { private SemanticGraph sg; - private Map indexesUsed = Generics.newHashMap(); + private TwoDimensionalMap indexesUsed = TwoDimensionalMap.hashMap(); private final Language language; private final Integer sentIndex; @@ -1935,17 +1943,22 @@ private void readDep(IndexedWord gov, String reln) { private IndexedWord makeVertex(String word) { Integer index; // initialized below - Pair wordAndIndex = readWordAndIndex(word); + Integer emptyIndex = 0; + Triple wordAndIndex = readWordAndIndex(word); if (wordAndIndex != null) { word = wordAndIndex.first(); index = wordAndIndex.second(); + emptyIndex = wordAndIndex.third(); } else { index = getNextFreeIndex(); } - if (indexesUsed.containsKey(index)) { - return indexesUsed.get(index); + if (indexesUsed.contains(index, emptyIndex)) { + return indexesUsed.get(index, emptyIndex); } IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index); + if (emptyIndex != 0) { + ifl.setEmptyIndex(emptyIndex); + } // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); // log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed); @@ -1954,18 +1967,24 @@ private IndexedWord makeVertex(String word) { ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); - indexesUsed.put(index, ifl); + indexesUsed.put(index, emptyIndex, ifl); return ifl; } - private static Pair readWordAndIndex(String word) { + private static Triple readWordAndIndex(String word) { Matcher matcher = WORD_AND_INDEX_PATTERN.matcher(word); if (!matcher.matches()) { return null; } else { word = matcher.group(1); Integer index = Integer.valueOf(matcher.group(2)); - return new Pair<>(word, index); + Integer emptyIndex; + if (matcher.group(3) != null) { + emptyIndex = Integer.valueOf(matcher.group(3)); + } else { + emptyIndex = 0; + } + return new Triple<>(word, index, emptyIndex); } } diff --git a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java index 6793b77207..ac38380f57 100644 --- a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java +++ b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java @@ -389,6 +389,58 @@ public void testValueOfIndices() { Assert.assertEquals(sg.getParentsWithReln(E, "dep").size(), 0); } + /** + * Test the vertices and edges of a very simple valueOf graph with indices added + */ + @Test + public void testValueOfEmptyIndices() { + // test some with tags and some without + SemanticGraph sg = SemanticGraph.valueOf("[A/foo-2 obj> B/bar-1 obj> C-1.2 nsubj> [D-1.1 obj> E-0]]"); + + List words = sg.vertexListSorted(); + Assert.assertEquals(words.size(), 5); + IndexedWord A = words.get(4); + IndexedWord B = words.get(1); + IndexedWord C = words.get(3); + IndexedWord D = words.get(2); + IndexedWord E = words.get(0); + + Assert.assertEquals(A.word(), "A"); + Assert.assertEquals(A.tag(), "foo"); + Assert.assertEquals(B.word(), "B"); + Assert.assertEquals(B.tag(), "bar"); + Assert.assertEquals(C.word(), "C"); + Assert.assertEquals(D.word(), "D"); + Assert.assertEquals(E.word(), "E"); + + Assert.assertEquals(sg.getAllEdges(A, B).size(), 1); + Assert.assertEquals(sg.getParentsWithReln(B, "obj").size(), 1); + + Assert.assertEquals(sg.getAllEdges(A, C).size(), 1); + Assert.assertEquals(sg.getParentsWithReln(C, "obj").size(), 1); + + Assert.assertEquals(sg.getAllEdges(A, D).size(), 1); + Assert.assertEquals(sg.getParentsWithReln(D, "nsubj").size(), 1); + Assert.assertEquals(sg.getParentsWithReln(D, "obj").size(), 0); + Assert.assertEquals(sg.getParentsWithReln(D, "dep").size(), 0); + + Assert.assertEquals(sg.getAllEdges(A, E).size(), 0); + Assert.assertEquals(sg.getAllEdges(D, E).size(), 1); + Assert.assertEquals(sg.getParentsWithReln(E, "obj").size(), 1); + Assert.assertEquals(sg.getParentsWithReln(E, "dep").size(), 0); + + Assert.assertEquals(A.index(), 2); + Assert.assertEquals(A.getEmptyIndex(), 0); + Assert.assertEquals(B.index(), 1); + Assert.assertEquals(B.getEmptyIndex(), 0); + Assert.assertEquals(C.index(), 1); + Assert.assertEquals(C.getEmptyIndex(), 2); + Assert.assertEquals(D.index(), 1); + Assert.assertEquals(D.getEmptyIndex(), 1); + Assert.assertEquals(E.index(), 0); + Assert.assertEquals(E.getEmptyIndex(), 0); + } + /** * Test the vertices and edges if we reuse some indices in valueOf */