Skip to content

Commit

Permalink
Sort morphological features in lowercase alphabetical order. Better f…
Browse files Browse the repository at this point in the history
…its with what is expected in UD datasets
  • Loading branch information
AngledLuffa committed Oct 28, 2023
1 parent bb4d96c commit da3bb4d
Showing 1 changed file with 21 additions and 3 deletions.
24 changes: 21 additions & 3 deletions src/edu/stanford/nlp/trees/ud/CoNLLUFeatures.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@
* which is necessary for the CoNLLU format
*/
public class CoNLLUFeatures extends TreeMap<String, String> {
public static class LowercaseComparator implements Comparator<String> {
public int compare(String x, String y) {
if (x == null && y == null) {
return 0;
}
if (x == null) {
return -1;
}
if (y == null) {
return 1;
}
return x.compareToIgnoreCase(y);
}
}

static final LowercaseComparator comparator = new LowercaseComparator();

/**
* Parses the value of the feature column in a CoNLL-U file
* and returns them in a HashMap with the feature names as keys
Expand All @@ -24,7 +41,7 @@ public class CoNLLUFeatures extends TreeMap<String, String> {
* @return A {@code HashMap<String,String>} with the feature values.
*/
public CoNLLUFeatures(String featureString) {
super();
super(comparator);

if (!featureString.equals("_")) {
String[] featValPairs = featureString.split("\\|");
Expand All @@ -36,11 +53,12 @@ public CoNLLUFeatures(String featureString) {
}

public CoNLLUFeatures(Map<String, String> features) {
super(features);
super(comparator);
putAll(features);
}

public CoNLLUFeatures() {
super();
super(comparator);
}


Expand Down

0 comments on commit da3bb4d

Please sign in to comment.