From a440609bd81234653ad6b4ccd94e67ac7ded57b7 Mon Sep 17 00:00:00 2001 From: NamSor Date: Mon, 15 Mar 2021 11:14:22 +0100 Subject: [PATCH 1/2] added name type classification - added name type classification - script name now sourced from API response --- pom.xml | 2 +- .../java/com/namsor/tools/NamSorTools.java | 121 ++++++++++++++---- 2 files changed, 97 insertions(+), 26 deletions(-) diff --git a/pom.xml b/pom.xml index 16964ac..c0ea9c9 100644 --- a/pom.xml +++ b/pom.xml @@ -14,7 +14,7 @@ ${project.groupId} namsor-sdk2 - 2.0.11 + 2.0.11B diff --git a/src/main/java/com/namsor/tools/NamSorTools.java b/src/main/java/com/namsor/tools/NamSorTools.java index 3cc0534..6a00154 100644 --- a/src/main/java/com/namsor/tools/NamSorTools.java +++ b/src/main/java/com/namsor/tools/NamSorTools.java @@ -6,6 +6,7 @@ package com.namsor.tools; import com.namsor.sdk2.api.AdminApi; +import com.namsor.sdk2.api.GeneralApi; import com.namsor.sdk2.api.PersonalApi; import com.namsor.sdk2.invoke.ApiClient; import com.namsor.sdk2.invoke.ApiException; @@ -15,22 +16,28 @@ import com.namsor.sdk2.model.BatchFirstLastNameIn; import com.namsor.sdk2.model.BatchFirstLastNameOriginedOut; import com.namsor.sdk2.model.BatchFirstLastNameUSRaceEthnicityOut; +import com.namsor.sdk2.model.BatchNameGeoIn; +import com.namsor.sdk2.model.BatchNameIn; import com.namsor.sdk2.model.BatchPersonalNameGenderedOut; import com.namsor.sdk2.model.BatchPersonalNameGeoIn; import com.namsor.sdk2.model.BatchPersonalNameGeoOut; import com.namsor.sdk2.model.BatchPersonalNameIn; import com.namsor.sdk2.model.BatchPersonalNameParsedOut; +import com.namsor.sdk2.model.BatchProperNounCategorizedOut; import com.namsor.sdk2.model.FirstLastNameDiasporaedOut; import com.namsor.sdk2.model.FirstLastNameGenderedOut; import com.namsor.sdk2.model.FirstLastNameGeoIn; import com.namsor.sdk2.model.FirstLastNameIn; import com.namsor.sdk2.model.FirstLastNameOriginedOut; import com.namsor.sdk2.model.FirstLastNameUSRaceEthnicityOut; +import com.namsor.sdk2.model.NameGeoIn; +import com.namsor.sdk2.model.NameIn; import com.namsor.sdk2.model.PersonalNameGenderedOut; import com.namsor.sdk2.model.PersonalNameGeoIn; import com.namsor.sdk2.model.PersonalNameGeoOut; import com.namsor.sdk2.model.PersonalNameIn; import com.namsor.sdk2.model.PersonalNameParsedOut; +import com.namsor.sdk2.model.ProperNounCategorizedOut; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -100,6 +107,7 @@ public class NamSorTools { private static final String SERVICE_NAME_GENDER = "gender"; private static final String SERVICE_NAME_ORIGIN = "origin"; private static final String SERVICE_NAME_COUNTRY = "country"; + private static final String SERVICE_NAME_TYPE = "nametype"; private static final String SERVICE_NAME_DIASPORA = "diaspora"; private static final String SERVICE_NAME_USRACEETHNICITY = "usraceethnicity"; @@ -108,14 +116,17 @@ public class NamSorTools { SERVICE_NAME_GENDER, SERVICE_NAME_ORIGIN, SERVICE_NAME_COUNTRY, + SERVICE_NAME_TYPE, SERVICE_NAME_DIASPORA, SERVICE_NAME_USRACEETHNICITY }; + private static final String[] OUTPUT_DATA_PARSE_HEADER = {"firstNameParsed", "lastNameParsed", "nameParserType", "nameParserTypeAlt", "nameParserTypeScore", "script"}; private static final String[] OUTPUT_DATA_GENDER_HEADER = {"likelyGender", "likelyGenderScore", "probabilityCalibrated", "genderScale", "script"}; - private static final String[] OUTPUT_DATA_ORIGIN_HEADER = {"countryOrigin", "countryOriginAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "countryOriginScore", "script"}; - private static final String[] OUTPUT_DATA_COUNTRY_HEADER = {"country", "countryAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "countryScore", "script"}; + private static final String[] OUTPUT_DATA_ORIGIN_HEADER = {"region","topRegion","subRegion","countryOrigin", "countryOriginAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "countryOriginScore", "script"}; + private static final String[] OUTPUT_DATA_COUNTRY_HEADER = {"region","topRegion","subRegion","country", "countryAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "countryScore", "script"}; + private static final String[] OUTPUT_DATA_TYPE_HEADER = {"commonType", "commonTypeAlt", "commonTypeScore", "script"}; private static final String[] OUTPUT_DATA_DIASPORA_HEADER = {"ethnicity", "ethnicityAlt", "ethnicityScore", "script"}; private static final String[] OUTPUT_DATA_USRACEETHNICITY_HEADER = {"raceEthnicity", "raceEthnicityAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "raceEthnicityScore", "script"}; private static final String[][] OUTPUT_DATA_HEADERS = { @@ -123,12 +134,14 @@ public class NamSorTools { OUTPUT_DATA_GENDER_HEADER, OUTPUT_DATA_ORIGIN_HEADER, OUTPUT_DATA_COUNTRY_HEADER, + OUTPUT_DATA_TYPE_HEADER, OUTPUT_DATA_DIASPORA_HEADER, OUTPUT_DATA_USRACEETHNICITY_HEADER }; private final CommandLine commandLineOptions; - private final PersonalApi api; + private final PersonalApi personalApi; + private final GeneralApi generalApi; private final AdminApi adminApi; private final int TIMEOUT = 30000; private final boolean withUID; @@ -154,7 +167,8 @@ public NamSorTools(CommandLine commandLineOptions) { client.setBasePath(basePath); } //client.setDebugging(false); - api = new PersonalApi(client); + personalApi = new PersonalApi(client); + generalApi = new GeneralApi(client); adminApi = new AdminApi(client); withUID = commandLineOptions.hasOption("uid"); @@ -195,7 +209,8 @@ public String digest(String inClear) { } } - public static String computeScriptFirst(String someString) { + /** Deprecaded, now use server side value + * public static String computeScriptFirst_(String someString) { for (int i = 0; i < someString.length(); i++) { Character c = someString.charAt(i); String script = Character.UnicodeScript.of(c).name(); @@ -205,7 +220,7 @@ public static String computeScriptFirst(String someString) { return script; } return null; - } + }*/ public static void main(String[] args) { // create the parser @@ -593,7 +608,7 @@ private Map processDiaspora(List result = new HashMap(); BatchFirstLastNameGeoIn body = new BatchFirstLastNameGeoIn(); body.setPersonalNames(names); - BatchFirstLastNameDiasporaedOut origined = api.diasporaBatch(body); + BatchFirstLastNameDiasporaedOut origined = personalApi.diasporaBatch(body); for (FirstLastNameDiasporaedOut personalName : origined.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -616,7 +631,7 @@ private Map processOrigin(List result = new HashMap(); BatchFirstLastNameIn body = new BatchFirstLastNameIn(); body.setPersonalNames(names); - BatchFirstLastNameOriginedOut origined = api.originBatch(body); + BatchFirstLastNameOriginedOut origined = personalApi.originBatch(body); for (FirstLastNameOriginedOut personalName : origined.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -627,7 +642,7 @@ private Map processGender(List result = new HashMap(); BatchFirstLastNameIn body = new BatchFirstLastNameIn(); body.setPersonalNames(names); - BatchFirstLastNameGenderedOut gendered = api.genderBatch(body); + BatchFirstLastNameGenderedOut gendered = personalApi.genderBatch(body); for (FirstLastNameGenderedOut personalName : gendered.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -638,7 +653,7 @@ private Map processGenderFull(List result = new HashMap(); BatchPersonalNameIn body = new BatchPersonalNameIn(); body.setPersonalNames(names); - BatchPersonalNameGenderedOut gendered = api.genderFullBatch(body); + BatchPersonalNameGenderedOut gendered = personalApi.genderFullBatch(body); for (PersonalNameGenderedOut personalName : gendered.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -649,12 +664,50 @@ private Map processCountry(List name Map result = new HashMap(); BatchPersonalNameIn body = new BatchPersonalNameIn(); body.setPersonalNames(names); - BatchPersonalNameGeoOut countried = api.countryBatch(body); + BatchPersonalNameGeoOut countried = personalApi.countryBatch(body); for (PersonalNameGeoOut personalName : countried.getPersonalNames()) { result.put(personalName.getId(), personalName); } return result; } + + private Map processNameType(List names_) throws ApiException, IOException { + List names = new ArrayList(); + for (PersonalNameIn personalNameIn : names_) { + NameIn name = new NameIn(); + name.setId(personalNameIn.getId()); + name.setName(personalNameIn.getName()); + names.add(name); + } + Map result = new HashMap(); + BatchNameIn body = new BatchNameIn(); + body.setProperNouns(names); + BatchProperNounCategorizedOut nameTypedOut = generalApi.nameTypeBatch(body); + for (ProperNounCategorizedOut nameTyped : nameTypedOut.getProperNouns()) { + result.put(nameTyped.getId(), nameTyped); + } + return result; + } + + private Map processNameTypeGeo(List names_) throws ApiException, IOException { + List names = new ArrayList(); + for (PersonalNameGeoIn personalNameIn : names_) { + NameGeoIn name = new NameGeoIn(); + name.setId(personalNameIn.getId()); + name.setName(personalNameIn.getName()); + name.setCountryIso2(personalNameIn.getCountryIso2()); + names.add(name); + } + Map result = new HashMap(); + BatchNameGeoIn body = new BatchNameGeoIn(); + body.setProperNouns(names); + BatchProperNounCategorizedOut nameTypedOut = generalApi.nameTypeGeoBatch(body); + for (ProperNounCategorizedOut nameTyped : nameTypedOut.getProperNouns()) { + result.put(nameTyped.getId(), nameTyped); + } + return result; + } + private Map processCountryAdapted(List names_) throws ApiException, IOException { List names = new ArrayList(); @@ -671,7 +724,7 @@ private Map processGenderFullGeo(List result = new HashMap(); BatchPersonalNameGeoIn body = new BatchPersonalNameGeoIn(); body.setPersonalNames(names); - BatchPersonalNameGenderedOut gendered = api.genderFullGeoBatch(body); + BatchPersonalNameGenderedOut gendered = personalApi.genderFullGeoBatch(body); for (PersonalNameGenderedOut personalName : gendered.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -682,7 +735,7 @@ private Map processParse(List nam Map result = new HashMap(); BatchPersonalNameIn body = new BatchPersonalNameIn(); body.setPersonalNames(names); - BatchPersonalNameParsedOut parsed = api.parseNameBatch(body); + BatchPersonalNameParsedOut parsed = personalApi.parseNameBatch(body); for (PersonalNameParsedOut personalName : parsed.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -693,7 +746,7 @@ private Map processGenderGeo(List result = new HashMap(); BatchFirstLastNameGeoIn body = new BatchFirstLastNameGeoIn(); body.setPersonalNames(names); - BatchFirstLastNameGenderedOut gendered = api.genderGeoBatch(body); + BatchFirstLastNameGenderedOut gendered = personalApi.genderGeoBatch(body); for (FirstLastNameGenderedOut personalName : gendered.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -704,7 +757,7 @@ private Map processParseGeo(List result = new HashMap(); BatchPersonalNameGeoIn body = new BatchPersonalNameGeoIn(); body.setPersonalNames(names); - BatchPersonalNameParsedOut parsed = api.parseNameGeoBatch(body); + BatchPersonalNameParsedOut parsed = personalApi.parseNameGeoBatch(body); for (PersonalNameParsedOut personalName : parsed.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -715,7 +768,7 @@ private Map processUSRaceEthnicity(List Map result = new HashMap(); BatchFirstLastNameGeoIn body = new BatchFirstLastNameGeoIn(); body.setPersonalNames(names); - BatchFirstLastNameUSRaceEthnicityOut racedEthnicized = api.usRaceEthnicityBatch(body); + BatchFirstLastNameUSRaceEthnicityOut racedEthnicized = personalApi.usRaceEthnicityBatch(body); for (FirstLastNameUSRaceEthnicityOut personalName : racedEthnicized.getPersonalNames()) { result.put(personalName.getId(), personalName); } @@ -762,6 +815,9 @@ private void processData(String service, String[] outputHeaders, Writer writer, } else if (service.equals(SERVICE_NAME_COUNTRY)) { Map countrieds = processCountry(new ArrayList(personalNamesIn.values())); append(writer, outputHeaders, personalNamesIn, countrieds, softwareNameAndVersion); + } else if (service.equals(SERVICE_NAME_TYPE)) { + Map nameTypeds = processNameType(new ArrayList(personalNamesIn.values())); + append(writer, outputHeaders, personalNamesIn, nameTypeds, softwareNameAndVersion); } personalNamesIn.clear(); } @@ -772,6 +828,9 @@ private void processData(String service, String[] outputHeaders, Writer writer, } else if (service.equals(SERVICE_NAME_GENDER)) { Map genders = processGenderFullGeo(new ArrayList(personalNamesGeoIn.values())); append(writer, outputHeaders, personalNamesGeoIn, genders, softwareNameAndVersion); + } else if (service.equals(SERVICE_NAME_TYPE)) { + Map nameTypeds = processNameTypeGeo(new ArrayList(personalNamesGeoIn.values())); + append(writer, outputHeaders, personalNamesGeoIn, nameTypeds, softwareNameAndVersion); } personalNamesGeoIn.clear(); } @@ -797,6 +856,12 @@ private void append(Writer writer, String[] outputHeaders, Map input, Map output } else if (inputObj instanceof PersonalNameGeoIn) { PersonalNameGeoIn personalNameGeoIn = (PersonalNameGeoIn) inputObj; writer.append(digest(personalNameGeoIn.getName()) + separatorOut + personalNameGeoIn.getCountryIso2() + separatorOut); + } else if (inputObj instanceof NameIn) { + NameIn personalNameIn = (NameIn) inputObj; + writer.append(digest(personalNameIn.getName()) + separatorOut); + } else if (inputObj instanceof NameGeoIn) { + NameGeoIn personalNameGeoIn = (NameGeoIn) inputObj; + writer.append(digest(personalNameGeoIn.getName()) + separatorOut + personalNameGeoIn.getCountryIso2() + separatorOut); } else { throw new IllegalArgumentException("Serialization of " + inputObj.getClass().getName() + " not supported"); } @@ -806,34 +871,40 @@ private void append(Writer writer, String[] outputHeaders, Map input, Map output } } else if (outputObj instanceof FirstLastNameGenderedOut) { FirstLastNameGenderedOut firstLastNameGenderedOut = (FirstLastNameGenderedOut) outputObj; - String scriptName = NamSorTools.computeScriptFirst(firstLastNameGenderedOut.getLastName()); + String scriptName = firstLastNameGenderedOut.getScript(); //NamSorTools.computeScriptFirst(firstLastNameGenderedOut.getLastName()); writer.append(firstLastNameGenderedOut.getLikelyGender().getValue() + separatorOut + firstLastNameGenderedOut.getScore() + separatorOut + firstLastNameGenderedOut.getProbabilityCalibrated() + separatorOut + firstLastNameGenderedOut.getGenderScale() + separatorOut + scriptName + separatorOut); } else if (outputObj instanceof FirstLastNameOriginedOut) { FirstLastNameOriginedOut firstLastNameOriginedOut = (FirstLastNameOriginedOut) outputObj; - String scriptName = NamSorTools.computeScriptFirst(firstLastNameOriginedOut.getLastName()); - writer.append(firstLastNameOriginedOut.getCountryOrigin() + separatorOut + firstLastNameOriginedOut.getCountryOriginAlt() + separatorOut + firstLastNameOriginedOut.getProbabilityCalibrated() + separatorOut + firstLastNameOriginedOut.getProbabilityAltCalibrated() + separatorOut + firstLastNameOriginedOut.getScore() + separatorOut + scriptName + separatorOut); + String scriptName = firstLastNameOriginedOut.getScript();//NamSorTools.computeScriptFirst(firstLastNameOriginedOut.getLastName()); + //"region","topRegion","subRegion" + writer.append(firstLastNameOriginedOut.getRegionOrigin() + separatorOut + firstLastNameOriginedOut.getTopRegionOrigin() + separatorOut + firstLastNameOriginedOut.getSubRegionOrigin() + separatorOut + firstLastNameOriginedOut.getCountryOrigin() + separatorOut + firstLastNameOriginedOut.getCountryOriginAlt() + separatorOut + firstLastNameOriginedOut.getProbabilityCalibrated() + separatorOut + firstLastNameOriginedOut.getProbabilityAltCalibrated() + separatorOut + firstLastNameOriginedOut.getScore() + separatorOut + scriptName + separatorOut); + } else if (outputObj instanceof ProperNounCategorizedOut) { + ProperNounCategorizedOut properNounCategorizedOut = (ProperNounCategorizedOut) outputObj; + String scriptName = properNounCategorizedOut.getScript();//NamSorTools.computeScriptFirst(properNounCategorizedOut.getName()); + writer.append(properNounCategorizedOut.getCommonType() + separatorOut + properNounCategorizedOut.getCommonTypeAlt() + separatorOut + properNounCategorizedOut.getScore() + separatorOut + scriptName + separatorOut); } else if (outputObj instanceof FirstLastNameDiasporaedOut) { FirstLastNameDiasporaedOut firstLastNameDiasporaedOut = (FirstLastNameDiasporaedOut) outputObj; - String scriptName = NamSorTools.computeScriptFirst(firstLastNameDiasporaedOut.getLastName()); + String scriptName = firstLastNameDiasporaedOut.getScript();//NamSorTools.computeScriptFirst(firstLastNameDiasporaedOut.getLastName()); writer.append(firstLastNameDiasporaedOut.getEthnicity() + separatorOut + firstLastNameDiasporaedOut.getEthnicityAlt() + separatorOut + firstLastNameDiasporaedOut.getScore() + separatorOut + scriptName + separatorOut); } else if (outputObj instanceof FirstLastNameUSRaceEthnicityOut) { FirstLastNameUSRaceEthnicityOut firstLastNameUSRaceEthnicityOut = (FirstLastNameUSRaceEthnicityOut) outputObj; - String scriptName = NamSorTools.computeScriptFirst(firstLastNameUSRaceEthnicityOut.getLastName()); + String scriptName = firstLastNameUSRaceEthnicityOut.getScript();//NamSorTools.computeScriptFirst(firstLastNameUSRaceEthnicityOut.getLastName()); writer.append(firstLastNameUSRaceEthnicityOut.getRaceEthnicity() + separatorOut + firstLastNameUSRaceEthnicityOut.getRaceEthnicityAlt() + separatorOut + firstLastNameUSRaceEthnicityOut.getProbabilityCalibrated() + separatorOut + firstLastNameUSRaceEthnicityOut.getProbabilityAltCalibrated() + separatorOut + firstLastNameUSRaceEthnicityOut.getScore() + separatorOut + scriptName + separatorOut); } else if (outputObj instanceof PersonalNameGenderedOut) { PersonalNameGenderedOut personalNameGenderedOut = (PersonalNameGenderedOut) outputObj; - String scriptName = NamSorTools.computeScriptFirst(personalNameGenderedOut.getName()); + String scriptName = personalNameGenderedOut.getScript();//NamSorTools.computeScriptFirst(personalNameGenderedOut.getName()); writer.append(personalNameGenderedOut.getLikelyGender().getValue() + separatorOut + personalNameGenderedOut.getScore() + separatorOut + personalNameGenderedOut.getGenderScale() + separatorOut + scriptName + separatorOut); } else if (outputObj instanceof PersonalNameGeoOut) { PersonalNameGeoOut personalNameGeoOut = (PersonalNameGeoOut) outputObj; - String scriptName = NamSorTools.computeScriptFirst(personalNameGeoOut.getName()); - writer.append(personalNameGeoOut.getCountry() + separatorOut + personalNameGeoOut.getCountryAlt() + separatorOut + personalNameGeoOut.getProbabilityCalibrated() + separatorOut + personalNameGeoOut.getProbabilityAltCalibrated() + separatorOut +personalNameGeoOut.getScore() + separatorOut + scriptName + separatorOut); + String scriptName = personalNameGeoOut.getScript();//NamSorTools.computeScriptFirst(personalNameGeoOut.getName()); + //"region","topRegion","subRegion" + writer.append(personalNameGeoOut.getRegion() + separatorOut + personalNameGeoOut.getTopRegion() + separatorOut + personalNameGeoOut.getSubRegion() + separatorOut + personalNameGeoOut.getCountry() + separatorOut + personalNameGeoOut.getCountryAlt() + separatorOut + personalNameGeoOut.getProbabilityCalibrated() + separatorOut + personalNameGeoOut.getProbabilityAltCalibrated() + separatorOut +personalNameGeoOut.getScore() + separatorOut + scriptName + separatorOut); } else if (outputObj instanceof PersonalNameParsedOut) { PersonalNameParsedOut personalNameParsedOut = (PersonalNameParsedOut) outputObj; // {"firstNameParsed", "lastNameParsed", "nameParserType", "nameParserTypeAlt", "nameParserTypeScore"}; String firstNameParsed = (personalNameParsedOut.getFirstLastName() != null ? personalNameParsedOut.getFirstLastName().getFirstName() : ""); String lastNameParsed = (personalNameParsedOut.getFirstLastName() != null ? personalNameParsedOut.getFirstLastName().getLastName() : ""); - String scriptName = NamSorTools.computeScriptFirst(personalNameParsedOut.getName()); + String scriptName = personalNameParsedOut.getScript();//NamSorTools.computeScriptFirst(personalNameParsedOut.getName()); writer.append(firstNameParsed + separatorOut + lastNameParsed + separatorOut + personalNameParsedOut.getNameParserType() + separatorOut + personalNameParsedOut.getNameParserTypeAlt() + separatorOut + personalNameParsedOut.getScore() + separatorOut + scriptName + separatorOut); } else { throw new IllegalArgumentException("Serialization of " + outputObj.getClass().getName() + " not supported"); From 7d260b380cb644f5e2c0fbd2bcc13adbf3f06497 Mon Sep 17 00:00:00 2001 From: NamSor Date: Mon, 15 Mar 2021 14:05:25 +0100 Subject: [PATCH 2/2] upgraded pom, added nameType and apiphone upgraded pom, added nameType and apiphone --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c0ea9c9..a535745 100644 --- a/pom.xml +++ b/pom.xml @@ -14,7 +14,7 @@ ${project.groupId} namsor-sdk2 - 2.0.11B + 2.0.13