/*
Copyright 2009 Olaf Hartig
This file is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details. You find the GNU General Public License at
.
*/
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileNotFoundException;
import java.io.OutputStream;
import java.util.Iterator;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.shared.JenaException;
import com.hp.hpl.jena.util.FileManager;
import arq.cmdline.ArgDecl;
import arq.cmdline.ArgModuleGeneral;
import arq.cmdline.CmdArgModule;
import arq.cmdline.CmdGeneral;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.sparql.util.Utils;
import de.fuberlin.wiwiss.ng4j.NamedGraph;
import de.fuberlin.wiwiss.ng4j.NamedGraphSet;
import de.fuberlin.wiwiss.ng4j.impl.NamedGraphImpl;
import de.fuberlin.wiwiss.ng4j.impl.NamedGraphSetImpl;
/**
* Creates a sliced graphset from a dataset generated by the Berlin SPARQL
* Benchmark (BSBM).
* The created graphset provides a sliced view of the original dataset as if
* the original has been retrieved using linked data principles. Thus, the
* created graphset contains single RDF graphs for each product type, product
* feature, producer, product, vendor, offer, person, and review in the BSBM-
* generated dataset. To slice the dataset this program simply queries the
* dataset for all relevant resources, issues a DESCRIBE query for each of
* these resources, and adds the graphs resulting from the DESCRIBE queries
* to a set of named graphs.
*
* To compile this program you need NG4J (including Jena and ARQ provided with
* NG4J).
*
* Version: 1.0
* @author Olaf Hartig
*/
public class convert extends CmdGeneral
{
final protected ModConvert modConvert = new ModConvert ();
final private NamedGraphSet ngs = new NamedGraphSetImpl ();
private long countTriples;
// initialization
protected convert ( String[] args )
{
super( args );
addModule( modConvert );
}
static public void main ( String [] args )
{
new convert( args ).mainRun();
}
// implementation of the CmdArgModule interface
protected void processModulesAndArgs ()
{
}
// implementation of the CmdMain interface
final protected void exec ()
{
ngs.clear();
countTriples = 0;
createGraphsForProductTypes();
createGraphsForProductFeatures();
createGraphsForProducers();
createGraphsForProducts();
createGraphsForVendors();
createGraphsForOffers();
createGraphsForPersons();
createGraphsForReviews();
System.out.println( String.valueOf(countTriples) + " triples in " + ngs.countGraphs() + " graphs" );
ngs.write( modConvert.getOutputStream(), "TRIG", null );
}
protected String getCommandName ()
{
return Utils.className( this );
}
// implementation of the CmdGeneral interface
protected String getSummary ()
{
return getCommandName() + " --dataset= [--output=]";
}
// helpers
protected void createGraphsForProductTypes ()
{
String query = "PREFIX rdf: \n" +
"PREFIX bsbm: \n" +
"SELECT ?t \n" +
"WHERE { \n" +
" ?t rdf:type bsbm:ProductType . \n" +
"}";
addGraphsFromQueryResults( query, "t" );
}
protected void createGraphsForProductFeatures ()
{
String query = "PREFIX rdf: \n" +
"PREFIX bsbm: \n" +
"SELECT ?f \n" +
"WHERE { \n" +
" ?f rdf:type bsbm:ProductFeature . \n" +
"}";
addGraphsFromQueryResults( query, "f" );
}
protected void createGraphsForProducers ()
{
String query = "PREFIX rdf: \n" +
"PREFIX bsbm: \n" +
"SELECT ?p \n" +
"WHERE { \n" +
" ?p rdf:type bsbm:Producer . \n" +
"}";
addGraphsFromQueryResults( query, "p" );
}
protected void createGraphsForProducts ()
{
String query = "PREFIX rdf: \n" +
"PREFIX bsbm: \n" +
"SELECT ?p \n" +
"WHERE { \n" +
" ?p rdf:type bsbm:Product . \n" +
"}";
addGraphsFromQueryResults( query, "p" );
}
protected void createGraphsForVendors ()
{
String query = "PREFIX rdf: \n" +
"PREFIX bsbm: \n" +
"SELECT ?v \n" +
"WHERE { \n" +
" ?v rdf:type bsbm:Vendor . \n" +
"}";
addGraphsFromQueryResults( query, "v" );
}
protected void createGraphsForOffers ()
{
String query = "PREFIX rdf: \n" +
"PREFIX bsbm: \n" +
"SELECT ?o \n" +
"WHERE { \n" +
" ?o rdf:type bsbm:Offer . \n" +
"}";
addGraphsFromQueryResults( query, "o" );
}
protected void createGraphsForPersons ()
{
String query = "PREFIX rdf: \n" +
"PREFIX foaf: \n" +
"SELECT ?p \n" +
"WHERE { \n" +
" ?p rdf:type foaf:Person . \n" +
"}";
addGraphsFromQueryResults( query, "p" );
}
protected void createGraphsForReviews ()
{
String query = "PREFIX rdf: \n" +
"PREFIX bsbm: \n" +
"SELECT ?r \n" +
"WHERE { \n" +
" ?r rdf:type bsbm:Review . \n" +
"}";
addGraphsFromQueryResults( query, "r" );
}
protected void addGraphsFromQueryResults ( String query, String varName )
{
Iterator it = execSingleElmtQuery( query, varName );
while ( it.hasNext() )
{
String resourceURI = it.next();
String graphURI = resourceURI.replace( "/instances/", "/data/" );
NamedGraph ng = new NamedGraphImpl( graphURI, describe(resourceURI).getGraph() );
ngs.addGraph( ng );
countTriples += ng.size();
}
}
protected Iterator execSingleElmtQuery ( String query, String varName )
{
QueryExecution qe = QueryExecutionFactory.create( query, modConvert.getBSBMDataset() );
ResultSet results = qe.execSelect();
return new SingleBindingIterator ( results, varName );
}
protected Model describe ( String uri )
{
QueryExecution qe = QueryExecutionFactory.create( "DESCRIBE <" + uri + ">", modConvert.getBSBMDataset() );
return qe.execDescribe();
}
/** The parameter module for this program. */
static class ModConvert implements ArgModuleGeneral
{
final protected ArgDecl datasetDecl = new ArgDecl( ArgDecl.HasValue, "dataset" );
final protected ArgDecl outputDecl = new ArgDecl( ArgDecl.HasValue, "output" );
private Model bsbmDataset;
private OutputStream out;
// implementation of the ArgModuleGeneral interface
public void registerWith ( CmdGeneral cmdLine )
{
cmdLine.getUsage().startCategory( "Dataset Conversion" );
cmdLine.add( datasetDecl,
"--dataset",
"File containing the benchmark dataset as generated by BSBM (mandatory)") ;
cmdLine.add( outputDecl,
"--output",
"The filename for the converted dataset (optional; print to stdout if omitted)") ;
}
// implementation of the ArgModule interface
public void processArgs ( CmdArgModule cmdLine )
{
bsbmDataset = null;
if ( cmdLine.contains(datasetDecl) )
{
String fileName = cmdLine.getValue( datasetDecl );
try {
bsbmDataset = FileManager.get().loadModel( fileName );
} catch ( JenaException e ) {
cmdLine.cmdError( "Loading the given dataset file failed (" + Utils.className(e) + ": " + e.getMessage() + ")." );
}
}
else {
cmdLine.cmdError( "No dataset specified" );
}
if ( cmdLine.contains(outputDecl) )
{
File outputFile = new File( cmdLine.getValue(outputDecl) );
try {
out = new FileOutputStream( outputFile );
} catch ( FileNotFoundException e ) {
cmdLine.cmdError( "Writing to the given output file is impossible: " + e.getMessage() );
}
}
else {
out = System.out;
}
}
// accessors
public Model getBSBMDataset ()
{
return bsbmDataset;
}
public OutputStream getOutputStream ()
{
return out;
}
}
/**
* An iterator that returns the URIs bound to a given variable in a set of
* query results.
*/
static class SingleBindingIterator implements Iterator
{
final private ResultSet results;
final private String varName;
public SingleBindingIterator ( ResultSet results, String varName ) { this.results = results; this.varName = varName; }
public boolean hasNext () { return results.hasNext(); }
public String next () { return results.nextSolution().getResource(varName).getURI(); }
public void remove () { throw new UnsupportedOperationException(); }
}
}