Skip to content

Commit

Permalink
issue #18 and #20 fixed along with code cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
aftiqb committed Feb 23, 2012
1 parent 877f6d5 commit 7981d97
Show file tree
Hide file tree
Showing 43 changed files with 74 additions and 572 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified institutions/Eurostat/parser/bin/org/deri/eurostat/Main.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified institutions/Eurostat/parser/build/org/deri/eurostat/Main.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified institutions/Eurostat/parser/dist/EuroStat.jar
Binary file not shown.
212 changes: 24 additions & 188 deletions institutions/Eurostat/parser/src/com/ontologycentral/estatwrap/Data.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.Reader;
import java.text.DecimalFormat;
import java.util.List;
import java.util.logging.Logger;

import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
Expand All @@ -22,14 +20,10 @@ public class Data {
static BufferedWriter write = null;
static FileWriter fstream = null;
int timePosition = 0;
// here, use a threshold to limit the amount of data converted (GAE limitations)
public static int MAX_COLS = 8;
public static int MAX_ROWS = 1024;
public static String type = "";

public Data(Reader sr) throws IOException, XMLStreamException {
_in = new BufferedReader(sr);

}

public void getObservationType(Reader reader) throws IOException, XMLStreamException
Expand Down Expand Up @@ -81,20 +75,6 @@ public void convert(XMLStreamWriter out, String id, String freq, String datasetI
h = new Header(line);
}

// if ((line = _inputStream.readLine()) != null) {
// ++rows;
// line = line.trim();
// if (line.length() <= 0) {
// throw new IOException("could not read header!");
// }
//
// h = new Header(line);
// }



//System.out.println("Type is :" + type);

if(type.equals("non-numeric value"))
{
createLogFile(logPath);
Expand All @@ -108,15 +88,13 @@ public void convert(XMLStreamWriter out, String id, String freq, String datasetI
}
}

List hd1 = h.getDim1();
List<String> hd1 = h.getDim1();
for (int j = 0; j < hd1.size(); ++j) {
//System.out.println("hd1 --> " + hd1.get(j));
if(hd1.get(j).equals("time"))
if(hd1.get(j).equals("time"))
timePosition = j;
}

while ((line = _in.readLine()) != null) {
//System.out.println("in print truple");
++rows;
line = line.trim();
if (line.length() <= 0) {
Expand All @@ -126,171 +104,62 @@ public void convert(XMLStreamWriter out, String id, String freq, String datasetI
l = new Line(line);

printTriple(h, l, out, rows, id, freq);

// this code restricts from converting more data.
// if (rows > MAX_ROWS) {
// break;
// }
}


_in.close();
}

/*
public void printTriple(Header h, Line l, XMLStreamWriter out, int bnodeid, String id) throws XMLStreamException {
List hd1 = h.getDim1();
List ld1 = l.getDim1();
if (hd1.size() != ld1.size()) {
System.err.println("header dimensions and line dimensions don't match!");
}
List hcol = h.getCols();
List lcol = l.getCols();
if (hcol.size() != lcol.size()) {
System.err.println("header columns and line columns don't match!");
}
int start = 0;
int end = Math.min(hcol.size(), MAX_COLS);
// hack - some stats are sorted from oldest to newest, some the other way round
// check if the last entry contains year 200x or 201x
String last = (String)hcol.get(hcol.size()-1);
//System.out.println(last);
if (last.contains("200") || last.contains("201")) {
start = hcol.size()-MAX_COLS;
if (start < 0) {
start = 0;
}
end = hcol.size();
}
for (int i = start; i < end; ++i)
{
if (((String)lcol.get(i)).equals(":")) {
continue;
}
out.writeStartElement("qb:Observation");
out.writeStartElement("qb:dataset");
out.writeAttribute("rdf:resource", baseURI + "/id/" + id + "#ds");
// @@@ workaround to get query processor to function
//out.writeAttribute("rdf:resource", id + "#ds");
out.writeEndElement();
for (int j = 0; j < hd1.size(); ++j) {
out.writeStartElement((String)hd1.get(j));
//--//out.writeAttribute("rdf:resource", Dictionary.PREFIX + (String)hd1.get(j) + "#" + (String)ld1.get(j));
out.writeAttribute("rdf:resource", baseURI + "/dic/" + (String)hd1.get(j) + "#" + (String)ld1.get(j));
out.writeEndElement();
}
out.writeStartElement((String)h.getDim2());
//--//out.writeAttribute("rdf:resource", Dictionary.PREFIX + (String)h.getDim2() + "#" + (String)hcol.get(i));
out.writeAttribute("rdf:resource", baseURI + "/dic/" + (String)h.getDim2() + "#" + (String)hcol.get(i));
out.writeEndElement();
//http://purl.org/linked-data/sdmx/2009/measure#obsValue
out.writeStartElement("sdmx-measure:obsValue");
String val = (String)lcol.get(i);
String note = null;
if (val.indexOf(' ') > 0) {
note = val.substring(val.indexOf(' ')+1);
val = val.substring(0, val.indexOf(' '));
//-//out.writeAttribute("rdf:datatype", Dictionary.PREFIX + "note#" + note);
out.writeAttribute("rdf:datatype", baseURI + "/dic/" + "note#" + note);
}
out.writeCharacters(val);
out.writeEndElement();
out.writeEndElement();
}
}
*/

public void getType(Header h, Line l)
{
List hd1 = h.getDim1();
List ld1 = l.getDim1();
String obs_URI = "";
List<String> hd1 = h.getDim1();
List<String> ld1 = l.getDim1();

if (hd1.size() != ld1.size()) {
System.err.println("header dimensions and line dimensions don't match!");
}

List hcol = h.getCols();
List lcol = l.getCols();
List<String> hcol = h.getCols();
List<String> lcol = l.getCols();

if (hcol.size() != lcol.size()) {
System.err.println("header columns and line columns don't match!");
}

int start = 0;

// displays only 8 columns data per dataset. But we need to dump all the data.
//int end = Math.min(hcol.size(), MAX_COLS);
int end = hcol.size();

// hack - some stats are sorted from oldest to newest, some the other way round
// check if the last entry contains year 200x or 201x
String last = (String)hcol.get(hcol.size()-1);

for (int i = start; i < end; ++i)
{
if (((String)lcol.get(i)).equals(":") || ((String)lcol.get(i)).contains(":")) {
continue;
}
String val = (String)lcol.get(i);
//System.out.println(val);
returnType(val);
}
for (int i = start; i < end; ++i)
{
if (((String)lcol.get(i)).equals(":") || ((String)lcol.get(i)).contains(":")) {
continue;
}
String val = (String)lcol.get(i);
returnType(val);
}

//System.out.println("Type is : " + type);
}

public void printTriple(Header h, Line l, XMLStreamWriter out, int bnodeid, String id, String freq) throws XMLStreamException {
List hd1 = h.getDim1();
List ld1 = l.getDim1();
List<String> hd1 = h.getDim1();
List<String> ld1 = l.getDim1();
DecimalFormat df = new DecimalFormat ("0.00");

String obs_URI = "";
if (hd1.size() != ld1.size()) {
System.err.println("header dimensions and line dimensions don't match!");
}

List hcol = h.getCols();
List lcol = l.getCols();
List<String> hcol = h.getCols();
List<String> lcol = l.getCols();

if (hcol.size() != lcol.size()) {
System.err.println("header columns and line columns don't match!");
}

int start = 0;

// displays only 8 columns data per dataset. But we need to dump all the data.
//int end = Math.min(hcol.size(), MAX_COLS);
// rdfize all columns data
int end = hcol.size();

// hack - some stats are sorted from oldest to newest, some the other way round
// check if the last entry contains year 200x or 201x
String last = (String)hcol.get(hcol.size()-1);
//System.out.println(last);

// This piece of code restricts the number of records to display only the last 8 columns if
// last entry contains year 200x or 201x. We dont need it in our case as we are dumping all data.
// if (last.contains("200") || last.contains("201")) {
// start = hcol.size()-MAX_COLS;
// if (start < 0) {
// start = 0;
// }
// end = hcol.size();
// }


for (int i = start; i < end; ++i)
{
if (((String)lcol.get(i)).equals(":") || ((String)lcol.get(i)).contains(":")) {
Expand All @@ -312,12 +181,9 @@ public void printTriple(Header h, Line l, XMLStreamWriter out, int bnodeid, Stri

out.writeStartElement("qb:dataSet");
out.writeAttribute("rdf:resource", "/data/" + id);
// @@@ workaround to get query processor to function
//out.writeAttribute("rdf:resource", id + "#ds");

out.writeEndElement();

// new code for adding FREQ
// add sdmx-dimension:freq
if(!freq.equals(""))
{
out.writeStartElement("sdmx-dimension:freq");
Expand All @@ -326,7 +192,6 @@ public void printTriple(Header h, Line l, XMLStreamWriter out, int bnodeid, Stri
}

for (int j = 0; j < hd1.size(); ++j) {
//System.out.println("hd1 --> " + hd1.get(j));
if(!hd1.get(j).equals("time"))
{
out.writeStartElement("property:" + (String)hd1.get(j));
Expand All @@ -351,8 +216,6 @@ public void printTriple(Header h, Line l, XMLStreamWriter out, int bnodeid, Stri
}
}

//System.out.println(hcol.get(i));
// new code
if(h.getDim2().equalsIgnoreCase("time"))
{
String timeperiod = time.convertTimeSereis((String)hcol.get(i));
Expand All @@ -375,34 +238,19 @@ public void printTriple(Header h, Line l, XMLStreamWriter out, int bnodeid, Stri
out.writeAttribute("rdf:resource", Dictionary.PREFIX + (String)h.getDim2() + "#" + (String)hcol.get(i));
out.writeEndElement();
}
// old code
// out.writeStartElement("property:" + (String)h.getDim2());
// out.writeAttribute("rdf:resource", Dictionary.PREFIX + (String)h.getDim2() + "#" + (String)hcol.get(i));
// out.writeEndElement();

//http://purl.org/linked-data/sdmx/2009/measure#obsValue

// exclude entries like ': c' which exists in the dataset
if(!lcol.get(i).toString().contains(":"))
{
out.writeStartElement("sdmx-measure:obsValue");
String val = (String)lcol.get(i);

//System.out.println(val);
//String datatype = "";
//if(type.equals("decimal"))
// datatype = "";

String status = null;
if (val.indexOf(' ') > 0 ) {
status = val.substring(val.indexOf(' ')+1);
val = val.substring(0, val.indexOf(' '));
//out.writeAttribute("rdf:resource", "/dic/obs_status#" + status);
//out.writeAttribute("rdf:datatype", Dictionary.PREFIX + "obs_status#" + status);
}

// new code

// certain observation values are represented by '-', we consider them to be 0.
if(val.equals("-"))
{
Expand All @@ -419,8 +267,6 @@ else if(type.equals("integer"))
out.writeCharacters(df.format(Double.valueOf(val).doubleValue()));
else
out.writeCharacters(val);


}
else if(type.equals("integer"))
{
Expand All @@ -430,7 +276,6 @@ else if(type.equals("integer"))
else
out.writeCharacters(val);


out.writeEndElement();

if(status != null)
Expand All @@ -440,14 +285,8 @@ else if(type.equals("integer"))
out.writeEndElement();
}
out.writeEndElement();

}

// old code
// out.writeCharacters(val);
// out.writeEndElement();
//
// out.writeEndElement();
}
}

Expand All @@ -465,11 +304,10 @@ public void returnType(String str)

if(!type.equals("non-numeric value"))
type = "decimal";
//System.out.println(str + " is a valid decimal number");

}
catch(NumberFormatException nme)
{
//System.out.println(str + " is not a valid decimal number");
type = "non-numeric value";
}

Expand All @@ -484,17 +322,15 @@ public void returnType(String str)
Integer.parseInt(str);
if(!type.equals("decimal") & !type.equals("non-numeric value"))
type = "integer";
//System.out.println(str + " is valid integer number");

}
catch(NumberFormatException nme)
{
//System.out.println(str + " is not a valid integer number");
type = "non-numeric value";
}
}
}

//return type;

}

public void createLogFile(String filePath)
Expand Down
Loading

0 comments on commit 7981d97

Please sign in to comment.