/* Copyright 2009 Olaf Hartig This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You find the GNU General Public License at . */ import java.io.File; import java.io.FileOutputStream; import java.io.FileNotFoundException; import java.io.OutputStream; import java.util.Iterator; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.shared.JenaException; import com.hp.hpl.jena.util.FileManager; import arq.cmdline.ArgDecl; import arq.cmdline.ArgModuleGeneral; import arq.cmdline.CmdArgModule; import arq.cmdline.CmdGeneral; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.sparql.util.Utils; import de.fuberlin.wiwiss.ng4j.NamedGraph; import de.fuberlin.wiwiss.ng4j.NamedGraphSet; import de.fuberlin.wiwiss.ng4j.impl.NamedGraphImpl; import de.fuberlin.wiwiss.ng4j.impl.NamedGraphSetImpl; /** * Creates a sliced graphset from a dataset generated by the Berlin SPARQL * Benchmark (BSBM). * The created graphset provides a sliced view of the original dataset as if * the original has been retrieved using linked data principles. Thus, the * created graphset contains single RDF graphs for each product type, product * feature, producer, product, vendor, offer, person, and review in the BSBM- * generated dataset. To slice the dataset this program simply queries the * dataset for all relevant resources, issues a DESCRIBE query for each of * these resources, and adds the graphs resulting from the DESCRIBE queries * to a set of named graphs. * * To compile this program you need NG4J (including Jena and ARQ provided with * NG4J). * * Version: 1.0 * @author Olaf Hartig */ public class convert extends CmdGeneral { final protected ModConvert modConvert = new ModConvert (); final private NamedGraphSet ngs = new NamedGraphSetImpl (); private long countTriples; // initialization protected convert ( String[] args ) { super( args ); addModule( modConvert ); } static public void main ( String [] args ) { new convert( args ).mainRun(); } // implementation of the CmdArgModule interface protected void processModulesAndArgs () { } // implementation of the CmdMain interface final protected void exec () { ngs.clear(); countTriples = 0; createGraphsForProductTypes(); createGraphsForProductFeatures(); createGraphsForProducers(); createGraphsForProducts(); createGraphsForVendors(); createGraphsForOffers(); createGraphsForPersons(); createGraphsForReviews(); System.out.println( String.valueOf(countTriples) + " triples in " + ngs.countGraphs() + " graphs" ); ngs.write( modConvert.getOutputStream(), "TRIG", null ); } protected String getCommandName () { return Utils.className( this ); } // implementation of the CmdGeneral interface protected String getSummary () { return getCommandName() + " --dataset= [--output=]"; } // helpers protected void createGraphsForProductTypes () { String query = "PREFIX rdf: \n" + "PREFIX bsbm: \n" + "SELECT ?t \n" + "WHERE { \n" + " ?t rdf:type bsbm:ProductType . \n" + "}"; addGraphsFromQueryResults( query, "t" ); } protected void createGraphsForProductFeatures () { String query = "PREFIX rdf: \n" + "PREFIX bsbm: \n" + "SELECT ?f \n" + "WHERE { \n" + " ?f rdf:type bsbm:ProductFeature . \n" + "}"; addGraphsFromQueryResults( query, "f" ); } protected void createGraphsForProducers () { String query = "PREFIX rdf: \n" + "PREFIX bsbm: \n" + "SELECT ?p \n" + "WHERE { \n" + " ?p rdf:type bsbm:Producer . \n" + "}"; addGraphsFromQueryResults( query, "p" ); } protected void createGraphsForProducts () { String query = "PREFIX rdf: \n" + "PREFIX bsbm: \n" + "SELECT ?p \n" + "WHERE { \n" + " ?p rdf:type bsbm:Product . \n" + "}"; addGraphsFromQueryResults( query, "p" ); } protected void createGraphsForVendors () { String query = "PREFIX rdf: \n" + "PREFIX bsbm: \n" + "SELECT ?v \n" + "WHERE { \n" + " ?v rdf:type bsbm:Vendor . \n" + "}"; addGraphsFromQueryResults( query, "v" ); } protected void createGraphsForOffers () { String query = "PREFIX rdf: \n" + "PREFIX bsbm: \n" + "SELECT ?o \n" + "WHERE { \n" + " ?o rdf:type bsbm:Offer . \n" + "}"; addGraphsFromQueryResults( query, "o" ); } protected void createGraphsForPersons () { String query = "PREFIX rdf: \n" + "PREFIX foaf: \n" + "SELECT ?p \n" + "WHERE { \n" + " ?p rdf:type foaf:Person . \n" + "}"; addGraphsFromQueryResults( query, "p" ); } protected void createGraphsForReviews () { String query = "PREFIX rdf: \n" + "PREFIX bsbm: \n" + "SELECT ?r \n" + "WHERE { \n" + " ?r rdf:type bsbm:Review . \n" + "}"; addGraphsFromQueryResults( query, "r" ); } protected void addGraphsFromQueryResults ( String query, String varName ) { Iterator it = execSingleElmtQuery( query, varName ); while ( it.hasNext() ) { String resourceURI = it.next(); String graphURI = resourceURI.replace( "/instances/", "/data/" ); NamedGraph ng = new NamedGraphImpl( graphURI, describe(resourceURI).getGraph() ); ngs.addGraph( ng ); countTriples += ng.size(); } } protected Iterator execSingleElmtQuery ( String query, String varName ) { QueryExecution qe = QueryExecutionFactory.create( query, modConvert.getBSBMDataset() ); ResultSet results = qe.execSelect(); return new SingleBindingIterator ( results, varName ); } protected Model describe ( String uri ) { QueryExecution qe = QueryExecutionFactory.create( "DESCRIBE <" + uri + ">", modConvert.getBSBMDataset() ); return qe.execDescribe(); } /** The parameter module for this program. */ static class ModConvert implements ArgModuleGeneral { final protected ArgDecl datasetDecl = new ArgDecl( ArgDecl.HasValue, "dataset" ); final protected ArgDecl outputDecl = new ArgDecl( ArgDecl.HasValue, "output" ); private Model bsbmDataset; private OutputStream out; // implementation of the ArgModuleGeneral interface public void registerWith ( CmdGeneral cmdLine ) { cmdLine.getUsage().startCategory( "Dataset Conversion" ); cmdLine.add( datasetDecl, "--dataset", "File containing the benchmark dataset as generated by BSBM (mandatory)") ; cmdLine.add( outputDecl, "--output", "The filename for the converted dataset (optional; print to stdout if omitted)") ; } // implementation of the ArgModule interface public void processArgs ( CmdArgModule cmdLine ) { bsbmDataset = null; if ( cmdLine.contains(datasetDecl) ) { String fileName = cmdLine.getValue( datasetDecl ); try { bsbmDataset = FileManager.get().loadModel( fileName ); } catch ( JenaException e ) { cmdLine.cmdError( "Loading the given dataset file failed (" + Utils.className(e) + ": " + e.getMessage() + ")." ); } } else { cmdLine.cmdError( "No dataset specified" ); } if ( cmdLine.contains(outputDecl) ) { File outputFile = new File( cmdLine.getValue(outputDecl) ); try { out = new FileOutputStream( outputFile ); } catch ( FileNotFoundException e ) { cmdLine.cmdError( "Writing to the given output file is impossible: " + e.getMessage() ); } } else { out = System.out; } } // accessors public Model getBSBMDataset () { return bsbmDataset; } public OutputStream getOutputStream () { return out; } } /** * An iterator that returns the URIs bound to a given variable in a set of * query results. */ static class SingleBindingIterator implements Iterator { final private ResultSet results; final private String varName; public SingleBindingIterator ( ResultSet results, String varName ) { this.results = results; this.varName = varName; } public boolean hasNext () { return results.hasNext(); } public String next () { return results.nextSolution().getResource(varName).getURI(); } public void remove () { throw new UnsupportedOperationException(); } } }