Morphological features need to be in a TreeMap to keep them sorted by…

… key
stanfordnlp · Oct 26, 2023 · 4b161a0 · 4b161a0
1 parent 2a09f6f
commit 4b161a0
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 14 deletions.
diff --git a/src/edu/stanford/nlp/ling/CoreAnnotations.java b/src/edu/stanford/nlp/ling/CoreAnnotations.java
@@ -8,6 +8,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.SortedSet;
+import java.util.TreeMap;
 
 /**
  * Set of common annotations for {@link CoreMap}s. The classes
@@ -580,9 +581,9 @@ public Class<HashMap<String,String>> getType() {
   /**
    * CoNLL-U dep parsing - List of morphological features
    */
-  public static class CoNLLUFeats implements CoreAnnotation<HashMap<String,String>> {
+  public static class CoNLLUFeats implements CoreAnnotation<TreeMap<String,String>> {
     @Override
-    public Class<HashMap<String,String>> getType() {
+    public Class<TreeMap<String,String>> getType() {
       return ErasureUtils.uncheckedCast(HashMap.class);
     }
   }

diff --git a/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java b/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java
@@ -1453,7 +1453,7 @@ public CoreLabel fromProto(CoreNLPProtos.Token proto) {
     if (proto.hasSpan()) { word.set(SpanAnnotation.class, new IntPair(proto.getSpan().getBegin(), proto.getSpan().getEnd())); }
     if (proto.hasSentiment()) { word.set(SentimentCoreAnnotations.SentimentClass.class, proto.getSentiment()); }
     if (proto.hasQuotationIndex()) { word.set(QuotationIndexAnnotation.class, proto.getQuotationIndex()); }
-    if (proto.hasConllUFeatures()) { word.set(CoNLLUFeats.class, fromProto(proto.getConllUFeatures())); }
+    if (proto.hasConllUFeatures()) { word.set(CoNLLUFeats.class, new TreeMap<>(fromProto(proto.getConllUFeatures()))); }
     if (proto.hasConllUMisc()) { word.set(CoNLLUMisc.class, proto.getConllUMisc()); }
     if (proto.hasCoarseTag()) { word.set(CoarseTagAnnotation.class, proto.getCoarseTag()); }
     if (proto.hasConllUTokenSpan()) { word.set(CoNLLUTokenSpanAnnotation.class, new IntPair(proto.getConllUTokenSpan().getBegin(), proto.getSpan().getEnd())); }

diff --git a/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/EditNode.java b/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/EditNode.java
@@ -1,7 +1,6 @@
 package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
 
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.Map;
 import java.util.TreeMap;
 
@@ -94,10 +93,10 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
     }
 
     for (String key : updateMorphoFeatures.keySet()) {
-      HashMap<String, String> features = word.get(CoreAnnotations.CoNLLUFeats.class);
+      TreeMap<String, String> features = word.get(CoreAnnotations.CoNLLUFeats.class);
       if (features == null) {
         changed = true;
-        features = new HashMap<>();
+        features = new TreeMap<>();
         word.set(CoreAnnotations.CoNLLUFeats.class, features);
       }
 

diff --git a/src/edu/stanford/nlp/trees/ud/CoNLLUDocumentReader.java b/src/edu/stanford/nlp/trees/ud/CoNLLUDocumentReader.java
@@ -245,7 +245,7 @@ public IndexedWord apply(String line) {
 
 
         /* Parse features. */
-        HashMap<String, String> features = CoNLLUUtils.parseFeatures(bits[5]);
+        TreeMap<String, String> features = CoNLLUUtils.parseFeatures(bits[5]);
         word.set(CoreAnnotations.CoNLLUFeats.class, features);
 
 
@@ -268,7 +268,7 @@ public IndexedWord apply(String line) {
         word.setValue(bits[1]);
 
         /* Parse features. */
-        HashMap<String, String> features = CoNLLUUtils.parseFeatures(bits[5]);
+        TreeMap<String, String> features = CoNLLUUtils.parseFeatures(bits[5]);
         word.set(CoreAnnotations.CoNLLUFeats.class, features);
 
         /* Parse extra dependencies. */

diff --git a/src/edu/stanford/nlp/trees/ud/CoNLLUUtils.java b/src/edu/stanford/nlp/trees/ud/CoNLLUUtils.java
@@ -17,8 +17,8 @@ public class CoNLLUUtils {
      * @param featureString
      * @return A {@code HashMap<String,String>} with the feature values.
      */
-    public static HashMap<String,String> parseFeatures(String featureString) {
-        HashMap<String, String> features = new LinkedHashMap<>();
+    public static TreeMap<String,String> parseFeatures(String featureString) {
+        TreeMap<String, String> features = new TreeMap<>();
         if (! featureString.equals("_")) {
             String[] featValPairs = featureString.split("\\|");
             for (String p : featValPairs) {

diff --git a/src/edu/stanford/nlp/trees/ud/UniversalDependenciesFeatureAnnotator.java b/src/edu/stanford/nlp/trees/ud/UniversalDependenciesFeatureAnnotator.java
@@ -7,8 +7,10 @@
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
+import java.util.TreeMap;
 
 import edu.stanford.nlp.io.IOUtils;
 import edu.stanford.nlp.io.RuntimeIOException;
@@ -51,8 +53,8 @@ public class UniversalDependenciesFeatureAnnotator  {
 
 
   private static final String FEATURE_MAP_FILE = "edu/stanford/nlp/models/ud/feature_map.txt";
-  private HashMap<String,HashMap<String,String>> posFeatureMap;
-  private HashMap<String,HashMap<String,String>> wordPosFeatureMap;
+  private HashMap<String,TreeMap<String,String>> posFeatureMap;
+  private Map<String,TreeMap<String,String>> wordPosFeatureMap;
 
   private final Morphology morphology = new Morphology();
 
@@ -390,10 +392,10 @@ public void addFeatures(SemanticGraph sg, Tree tree, boolean addLemma, boolean a
       String posTag = word.get(CoreAnnotations.PartOfSpeechAnnotation.class);
       String token = word.get(CoreAnnotations.TextAnnotation.class);
       Integer index = word.get(CoreAnnotations.IndexAnnotation.class);
-      HashMap<String, String> wordFeatures = word.get(CoreAnnotations.CoNLLUFeats.class);
+      TreeMap<String, String> wordFeatures = word.get(CoreAnnotations.CoNLLUFeats.class);
 
       if (wordFeatures == null) {
-        wordFeatures = new HashMap<>();
+        wordFeatures = new TreeMap<>();
         word.set(CoreAnnotations.CoNLLUFeats.class, wordFeatures);
       }