diff --git a/nb-configuration.xml b/nb-configuration.xml index 03b36cb..f9ed9ee 100644 --- a/nb-configuration.xml +++ b/nb-configuration.xml @@ -10,6 +10,7 @@ Without this configuration present, some functionality in the IDE may be limited + diff --git a/nbactions-run-diaspora-id_clone.xml b/nbactions-run-diaspora-id_clone.xml index 4a4ec10..d88c7ce 100644 --- a/nbactions-run-diaspora-id_clone.xml +++ b/nbactions-run-diaspora-id_clone.xml @@ -10,7 +10,7 @@ org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey ed051bf58dcda88ee57ddfe6feddde9d -r -uid -header -f fnlngeo -i D:\Projects\georgia\companiesHouse\persons-with-significant-control-snapshot-2019-05-14_idfnlngeo.txt -service gender + -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey e77eee3fd16f82751912d27cdbacf7ec -w -header -uid -f fnln -i "D:\Sync\Dropbox\0_NamSor_SAS\NamSor_com\NamSor SAS\10_clients\HEC.CA\20200108_HEC_ca_idfnln.txt" -service country java @@ -24,7 +24,7 @@ org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey ed051bf58dcda88ee57ddfe6feddde9d -r -uid -header -f fnlngeo -i D:\Projects\georgia\companiesHouse\persons-with-significant-control-snapshot-2019-05-14_idfnlngeo.txt -service gender + -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey e77eee3fd16f82751912d27cdbacf7ec -w -header -uid -f fnln -i "D:\Sync\Dropbox\0_NamSor_SAS\NamSor_com\NamSor SAS\10_clients\HEC.CA\20200108_HEC_ca_idfnln.txt" -service country true java @@ -39,7 +39,50 @@ org.codehaus.mojo:exec-maven-plugin:1.2.1:exec - -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey ed051bf58dcda88ee57ddfe6feddde9d -r -uid -header -f fnlngeo -i D:\Projects\georgia\companiesHouse\persons-with-significant-control-snapshot-2019-05-14_idfnlngeo.txt -service gender + -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey e77eee3fd16f82751912d27cdbacf7ec -w -header -uid -f fnln -i "D:\Sync\Dropbox\0_NamSor_SAS\NamSor_com\NamSor SAS\10_clients\HEC.CA\20200108_HEC_ca_idfnln.txt" -service country + java + + + + run + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey e77eee3fd16f82751912d27cdbacf7ec -w -header -uid -f fnln -i "D:\Sync\Dropbox\0_NamSor_SAS\NamSor_com\NamSor SAS\10_clients\HEC.CA\20200108_HEC_ca_idfnln.txt" -service country + java + + + + debug + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey e77eee3fd16f82751912d27cdbacf7ec -w -header -uid -f fnln -i "D:\Sync\Dropbox\0_NamSor_SAS\NamSor_com\NamSor SAS\10_clients\HEC.CA\20200108_HEC_ca_idfnln.txt" -service country + true + java + + + + profile + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.2.1:exec + + + -Xmx12500m -classpath %classpath com.namsor.tools.NamSorTools -apiKey e77eee3fd16f82751912d27cdbacf7ec -w -header -uid -f fnln -i "D:\Sync\Dropbox\0_NamSor_SAS\NamSor_com\NamSor SAS\10_clients\HEC.CA\20200108_HEC_ca_idfnln.txt" -service country java diff --git a/pom.xml b/pom.xml index a4569e4..5f3c7f0 100644 --- a/pom.xml +++ b/pom.xml @@ -14,7 +14,7 @@ ${project.groupId} namsor-sdk2 - 2.0.7 + 2.0.8 diff --git a/src/main/java/com/namsor/tools/NamSorTools.java b/src/main/java/com/namsor/tools/NamSorTools.java index 5d7a5a5..04463b0 100644 --- a/src/main/java/com/namsor/tools/NamSorTools.java +++ b/src/main/java/com/namsor/tools/NamSorTools.java @@ -17,6 +17,7 @@ import com.namsor.sdk2.model.BatchFirstLastNameUSRaceEthnicityOut; import com.namsor.sdk2.model.BatchPersonalNameGenderedOut; import com.namsor.sdk2.model.BatchPersonalNameGeoIn; +import com.namsor.sdk2.model.BatchPersonalNameGeoOut; import com.namsor.sdk2.model.BatchPersonalNameIn; import com.namsor.sdk2.model.BatchPersonalNameParsedOut; import com.namsor.sdk2.model.FirstLastNameDiasporaedOut; @@ -27,6 +28,7 @@ import com.namsor.sdk2.model.FirstLastNameUSRaceEthnicityOut; import com.namsor.sdk2.model.PersonalNameGenderedOut; import com.namsor.sdk2.model.PersonalNameGeoIn; +import com.namsor.sdk2.model.PersonalNameGeoOut; import com.namsor.sdk2.model.PersonalNameIn; import com.namsor.sdk2.model.PersonalNameParsedOut; import java.io.BufferedReader; @@ -97,6 +99,7 @@ public class NamSorTools { private static final String SERVICE_NAME_PARSE = "parse"; private static final String SERVICE_NAME_GENDER = "gender"; private static final String SERVICE_NAME_ORIGIN = "origin"; + private static final String SERVICE_NAME_COUNTRY = "country"; private static final String SERVICE_NAME_DIASPORA = "diaspora"; private static final String SERVICE_NAME_USRACEETHNICITY = "usraceethnicity"; @@ -104,6 +107,7 @@ public class NamSorTools { SERVICE_NAME_PARSE, SERVICE_NAME_GENDER, SERVICE_NAME_ORIGIN, + SERVICE_NAME_COUNTRY, SERVICE_NAME_DIASPORA, SERVICE_NAME_USRACEETHNICITY }; @@ -111,12 +115,14 @@ public class NamSorTools { private static final String[] OUTPUT_DATA_PARSE_HEADER = {"firstNameParsed", "lastNameParsed", "nameParserType", "nameParserTypeAlt", "nameParserTypeScore", "script"}; private static final String[] OUTPUT_DATA_GENDER_HEADER = {"likelyGender", "likelyGenderScore", "probabilityCalibrated", "genderScale", "script"}; private static final String[] OUTPUT_DATA_ORIGIN_HEADER = {"countryOrigin", "countryOriginAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "countryOriginScore", "script"}; + private static final String[] OUTPUT_DATA_COUNTRY_HEADER = {"country", "countryAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "countryScore", "script"}; private static final String[] OUTPUT_DATA_DIASPORA_HEADER = {"ethnicity", "ethnicityAlt", "ethnicityScore", "script"}; private static final String[] OUTPUT_DATA_USRACEETHNICITY_HEADER = {"raceEthnicity", "raceEthnicityAlt", "probabilityCalibrated", "probabilityCalibratedAlt", "raceEthnicityScore", "script"}; private static final String[][] OUTPUT_DATA_HEADERS = { OUTPUT_DATA_PARSE_HEADER, OUTPUT_DATA_GENDER_HEADER, OUTPUT_DATA_ORIGIN_HEADER, + OUTPUT_DATA_COUNTRY_HEADER, OUTPUT_DATA_DIASPORA_HEADER, OUTPUT_DATA_USRACEETHNICITY_HEADER }; @@ -278,7 +284,7 @@ public static void main(String[] args) { Option service = Option.builder("service").argName("service") .hasArg(true) - .desc("service : parse / gender / origin / diaspora / usraceethnicity") + .desc("service : parse / gender / origin / country / diaspora / usraceethnicity") .longOpt("endpoint") .required(true) .build(); @@ -610,6 +616,28 @@ private Map processGenderFull(List processCountry(List names) throws ApiException, IOException { + Map result = new HashMap(); + BatchPersonalNameIn body = new BatchPersonalNameIn(); + body.setPersonalNames(names); + BatchPersonalNameGeoOut countried = api.countryBatch(body); + for (PersonalNameGeoOut personalName : countried.getPersonalNames()) { + result.put(personalName.getId(), personalName); + } + return result; + } + + private Map processCountryAdapted(List names_) throws ApiException, IOException { + List names = new ArrayList(); + for (FirstLastNameIn name : names_) { + PersonalNameIn adapted = new PersonalNameIn(); + adapted.setId(name.getId()); + adapted.setName(name.getFirstName()+" "+name.getLastName()); + names.add(adapted); + } + return processCountry(names); + } + private Map processGenderFullGeo(List names) throws ApiException, IOException { Map result = new HashMap(); BatchPersonalNameGeoIn body = new BatchPersonalNameGeoIn(); @@ -673,6 +701,9 @@ private void processData(String service, String[] outputHeaders, Writer writer, } else if (service.equals(SERVICE_NAME_GENDER)) { Map genders = processGender(new ArrayList(firstLastNamesIn.values())); append(writer, outputHeaders, firstLastNamesIn, genders, softwareNameAndVersion); + } else if (service.equals(SERVICE_NAME_COUNTRY)) { + Map countrieds = processCountryAdapted(new ArrayList(firstLastNamesIn.values())); + append(writer, outputHeaders, firstLastNamesIn, countrieds, softwareNameAndVersion); } firstLastNamesIn.clear(); } @@ -699,6 +730,9 @@ private void processData(String service, String[] outputHeaders, Writer writer, } else if (service.equals(SERVICE_NAME_GENDER)) { Map genders = processGenderFull(new ArrayList(personalNamesIn.values())); append(writer, outputHeaders, personalNamesIn, genders, softwareNameAndVersion); + } else if (service.equals(SERVICE_NAME_COUNTRY)) { + Map countrieds = processCountry(new ArrayList(personalNamesIn.values())); + append(writer, outputHeaders, personalNamesIn, countrieds, softwareNameAndVersion); } personalNamesIn.clear(); } @@ -761,6 +795,10 @@ private void append(Writer writer, String[] outputHeaders, Map input, Map output PersonalNameGenderedOut personalNameGenderedOut = (PersonalNameGenderedOut) outputObj; String scriptName = NamSorTools.computeScriptFirst(personalNameGenderedOut.getName()); writer.append(personalNameGenderedOut.getLikelyGender().getValue() + separatorOut + personalNameGenderedOut.getScore() + separatorOut + personalNameGenderedOut.getGenderScale() + separatorOut + scriptName + separatorOut); + } else if (outputObj instanceof PersonalNameGeoOut) { + PersonalNameGeoOut personalNameGeoOut = (PersonalNameGeoOut) outputObj; + String scriptName = NamSorTools.computeScriptFirst(personalNameGeoOut.getName()); + writer.append(personalNameGeoOut.getCountry() + separatorOut + personalNameGeoOut.getCountryAlt() + separatorOut + personalNameGeoOut.getProbabilityCalibrated() + separatorOut + personalNameGeoOut.getProbabilityAltCalibrated() + separatorOut +personalNameGeoOut.getScore() + separatorOut + scriptName + separatorOut); } else if (outputObj instanceof PersonalNameParsedOut) { PersonalNameParsedOut personalNameParsedOut = (PersonalNameParsedOut) outputObj; // {"firstNameParsed", "lastNameParsed", "nameParserType", "nameParserTypeAlt", "nameParserTypeScore"};