Commit 410760f0 authored by Marshall McDonnell's avatar Marshall McDonnell

Re #475 - Prepare BATS for migration to ICE

Signed-off-by: default avatarMarshall McDonnell <mcdonnellmt@ornl.gov>
parent b0b41691
# Created by https://www.toptal.com/developers/gitignore/api/java,eclipse
# Edit at https://www.toptal.com/developers/gitignore?templates=java,eclipse
### Eclipse ###
.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
.recommenders
# External tool builders
.externalToolBuilders/
# Locally stored "Eclipse launch configurations"
*.launch
# PyDev specific (Python IDE for Eclipse)
*.pydevproject
# CDT-specific (C/C++ Development Tooling)
.cproject
# CDT- autotools
.autotools
# Java annotation processor (APT)
.factorypath
# PDT-specific (PHP Development Tools)
.buildpath
# sbteclipse plugin
.target
# Tern plugin
.tern-project
# TeXlipse plugin
.texlipse
# STS (Spring Tool Suite)
.springBeans
# Code Recommenders
.recommenders/
# Annotation Processing
.apt_generated/
.apt_generated_test/
# Scala IDE specific (Scala & Java development for Eclipse)
.cache-main
.scala_dependencies
.worksheet
# Uncomment this line if you wish to ignore the project description file.
# Typically, this file would be tracked if it contains build/dependency configurations:
#.project
### Eclipse Patch ###
# Spring Boot Tooling
.sts4-cache/
### Java ###
# Compiled class file
*.class
# Log file
*.log
# BlueJ files
*.ctxt
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
# End of https://www.toptal.com/developers/gitignore/api/java,eclipse
target
# BATS
BATS, the Basic Artifact Tracking System (BATS), is a simple data management service for managing scientific data.
## Build Instructions
### Prerequisites
BATS requires a full installation of Docker for building, executing, and storing images.
### Detailed Build Instructions
See each subfolder for build instructions.
## How BATS got its name
I had a discussion with my daughter, 17 months old at the time, about her favorite animal. She picked the moose, but since that is already taken by several projects we settled on her second favorite animal, the bat. I then back-ronymed a name out of it.
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.eclipse.ice</groupId>
<artifactId>org.eclipse.ice.bats</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>Eclipse ICE BATS</name>
<description>Basic Artifact Tracking System</description>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<version>2.22.1</version>
<executions>
<execution>
<goals>
<goal>integration-test</goal>
<goal>verify</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
<version>0.34.1</version>
<configuration>
<images>
<image>
<name>bats-fuseki:latest</name>
<build>
<contextDir>${project.basedir}/src/main/docker</contextDir>
<dockerFile>Dockerfile.fuseki</dockerFile>
</build>
<run>
<ports>
<port>3030:3030</port>
</ports>
<volumes>
<bind>
<volume>/opt/fuseki-TDB:/data/TDB</volume>
</bind>
</volumes>
<network>
<mode>host</mode>
</network>
<wait>
<url>http://localhost:3030</url>
</wait>
</run>
</image>
</images>
</configuration>
<executions>
<execution>
<id>prepare-fuseki-database</id>
<phase>pre-integration-test</phase>
<goals>
<goal>start</goal>
</goals>
</execution>
<execution>
<id>remove-fuseki-database</id>
<phase>post-integration-test</phase>
<goals>
<goal>stop</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>apache-jena-libs</artifactId>
<type>pom</type>
<version>3.7.0</version>
</dependency>
<dependency>
<groupId>org.topbraid</groupId>
<artifactId>shacl</artifactId>
<version>1.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.11.2</version>
</dependency>
</dependencies>
</project>
FROM openjdk:11.0.1-jre
# Specify Fuseki variables
ARG FUSEKI_VERSION=3.16.0
ARG FUSEKI_NAME=apache-jena-fuseki
ARG FUSEKI_DOWNLOAD_FILE=$FUSEKI_NAME-$FUSEKI_VERSION.tar.gz
# Install Fuseki
RUN wget https://www-us.apache.org/dist/jena/binaries/$FUSEKI_DOWNLOAD_FILE && \
tar -xzvf $FUSEKI_DOWNLOAD_FILE && \
mv $FUSEKI_NAME-$FUSEKI_VERSION /opt/$FUSEKI_NAME && \
mkdir -p /opt/$FUSEKI_NAME/run/configuration
ADD dataset.ttl /opt/$FUSEKI_NAME/run/configuration/dataset.ttl
# Expose the Fuseki port
EXPOSE 3030
# Execute Fuseki from the installation directory
WORKDIR /opt/apache-jena-fuseki
ENTRYPOINT ["./fuseki-server"]
@prefix : <http://base/#> .
@prefix tdb: <http://jena.hpl.hp.com/2008/tdb#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix fuseki: <http://jena.apache.org/fuseki#> .
:service_tdb_all a fuseki:Service ;
fuseki:dataset :dataset ;
fuseki:name "bats-dataset" ;
fuseki:serviceQuery "query" , "sparql" ;
fuseki:serviceReadGraphStore "get" ;
fuseki:serviceReadWriteGraphStore "data" ;
fuseki:serviceUpdate "update" ;
fuseki:serviceUpload "upload" .
:dataset a ja:RDFDataset ;
ja:defaultGraph <#model_inf> ;
.
<#model_inf> a ja:InfModel ;
ja:baseModel <#graph> ;
ja:reasoner [
ja:reasonerURL <http://jena.hpl.hp.com/2003/OWLFBRuleReasoner>
] .
<#graph> rdf:type tdb:GraphTDB ;
tdb:dataset :tdb_dataset_readwrite .
:tdb_dataset_readwrite
a tdb:DatasetTDB ;
tdb:location "/data/TDB"
.
/******************************************************************************
* Copyright (c) 2019- UT-Battelle, LLC.
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the Eclipse Public License v1.0,
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Initial API and implementation and/or initial documentation -
* Jay Jay Billings
*****************************************************************************/
package org.eclipse.ice.bats;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import org.apache.http.Consts;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.message.BasicNameValuePair;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.ReadWrite;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdfconnection.RDFConnectionFuseki;
import org.apache.jena.rdfconnection.RDFConnectionRemoteBuilder;
import org.apache.jena.riot.Lang;
import org.apache.jena.update.Update;
import org.apache.jena.util.FileManager;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.core.Logger;
/**
* This class represents a set of data describing a topic or item of interest.
* In BATS, data sets are natively distributed across one or more servers. The
* initial hostname and port of an Apache Jena Fuseki server must be provided in
* order to pull the root RDF model that describes this data set, as well as an
* associated metadata models.
*
* Data sets are organized in the Apache Jena style, with each set containing
* one or more subsets called "Models." This class largely wraps those
* operations into a more convenient interface that masks Jena's HTTP-based
* transfer routines, and fits the intended use better. However, advanced users
* may retrieve the Jena dataset by calling getJenaDataset().
*
* By default, DataSet only create Jena TDB2 persistent triple stores on the
* remote server for RDF models. Instances do not hold copies or handles to any
* data that they represent because the size of the data is not known in
* advance. Instead, this class interacts directly with the remote triple store.
*
* @author Jay Jay Billings
*
*/
public class DataSet {
/**
* This is the default name used as the base for all unnamed instances of
* DataSet.
*/
public static final String DEFAULT_NAME = "unnamed-dataset";
/**
* Log utility
*/
protected static final org.apache.logging.log4j.Logger logger = LogManager.getLogger(DataSet.class.getName());
/**
* The default host which holds the dataset.
*/
private String host = "http://localhost";
/**
* The default port of the host which holds the dataset.
*/
private int port = 3030;
/**
* The default name for a dataset.
*/
private String name = DEFAULT_NAME;
/**
* This operation sets the name of the data set. The name of the data set is the
* name recognized by the host, not the local machine. It must be set prior to
* calling create() or load(), but calling it after those operations does not
* change it.
*
* @param name
*/
public void setName(final String name) {
this.name = name;
}
/**
* This operation returns the name of the data set.
*
* @return the name
*/
public String getName() {
return name;
}
/**
* This operation returns the host of the data set.
*
* @return the host
*/
public String getHost() {
return host;
}
/**
* This operation sets the host at which the data set should be created or from
* which it should be loaded.
*
* @param host the URI of the remote Fuseki host that hosts the data set
*/
public void setHost(final String host) {
this.host = host;
}
/**
* This operation returns the port of the host of this data set.
*
* @return the port
*/
public int getPort() {
return port;
}
/**
* This operation sets the expected port of the host of this data set.
*
* @param port
*/
public void setPort(final int port) {
this.port = port;
}
/**
* This operation returns the full URI identifying this data set on the remote
* server, including hostname, port, and set name.
*
* @return the full URI including all parts
*/
public String getFullURI() {
return getHost() + ":" + getPort() + "/" + getName();
}
/**
* This operation creates a dataset with the given name. If no name is provided
* to setName(), the default name with a UUID appended to it will be used such
* that the form of the name will be "unnamed-dataset_<UUID>." Note that
* creation does not imply retrieval, and that the getRootModel() or getModel()
* functions still need to be called. Likewise (and obviously), if the model
* already exists on the remote server it can just be retrieved without calling
* create().
*
* @throws Exception this exception is thrown if the data set cannot be created
* for any reason.
*/
public void create() throws Exception {
// Configure the name
String dbName = DEFAULT_NAME;
if (name == DEFAULT_NAME) {
name += "_" + UUID.randomUUID().toString();
}
dbName = name;
// Per the spec, always use tdb2.
String dbType = "tdb2";
// Connect the HTTP client
HttpClient client = HttpClientBuilder.create().build();
String fusekiLocation = host + ":" + port + "/";
String fusekiDataAPILoc = "$/datasets";
HttpPost post = new HttpPost((fusekiLocation + fusekiDataAPILoc));
// Add the database parameters into the form with UTF_8 encoding.
List<NameValuePair> form = new ArrayList<NameValuePair>();
form.add(new BasicNameValuePair("dbName", dbName));
form.add(new BasicNameValuePair("dbType", dbType));
UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(form, Consts.UTF_8);
// Create the data set
post.setEntity(formEntity);
HttpResponse response = client.execute(post);
logger.debug(response.toString());
return;
}
/**
* This operation directs the data set to update and persist any remotely stored
* versions of this model with this version of the model. This action is a
* complete re-write of the data, with out a merge or any checks.
*
* @param modelName the name of the model that will be updated
* @param model the model that will be updated remotely
*/
public void updateModel(final String modelName, Model model) {
RDFConnectionRemoteBuilder uploadConnBuilder = RDFConnectionFuseki.create()
.destination(getFullURI() + "/data");
// Open a connection to upload the ICE ontology.
try (RDFConnectionFuseki uploadConn = (RDFConnectionFuseki) uploadConnBuilder.build()) {
// Note that transactions must proceed with begin(), some operation(), and
// commit().
uploadConn.begin(ReadWrite.WRITE);
System.out.println(model.toString());
// uploadConn.load(modelName, model);
uploadConn.put(modelName, model);
uploadConn.commit();
logger.debug("Committed model " + modelName + " to data set" + getName());
} catch (Exception e) {
logger.error("Unable to update model " + modelName + " in data set " + getName()
+ " on the remote Fuseki server.", e);
}
}
/**
* This operation returns the root model in the data set, which is called the
* default graph in the Jena jargon. It is referred to as the root model here to
* denote that it is the root model in a hierarchy of models describing the same
* set. This is a convenience method identically equal to calling getModel(null)
* or getModel("default").
*
* @return the root model if the data set exists, otherwise null
*/
public Model getRootModel() {
return getModel(null);
}
/**
* This operation returns the model with the given name if it exists in the data
* set.
*
* @param modelName the name of the model that should be retrieved from the data
* set. Note that like Jena, calling with an argument of
* "default" or "null" will return the default graph/model.
* @return the model if it exists in the data set, otherwise null
*/
public Model getModel(final String modelName) {
Model model = null;
RDFConnectionRemoteBuilder getConnBuilder = RDFConnectionFuseki.create()
.destination(getFullURI() + "/data");
try (RDFConnectionFuseki getConn = (RDFConnectionFuseki) getConnBuilder.build()) {
getConn.begin(ReadWrite.READ);
model = getConn.fetch(modelName);
getConn.commit();
logger.debug("Retrieved model " + modelName + " from data set" + getName());
} catch (Exception e) {
logger.error("Unable to find model " + modelName + " in data set " + getName(), e);
}
return model;
}
/**
* This operation returns the raw Jena data set pulled from Fuseki. This could
* be a long-running operation depending on the size of the remote data. This
* operation is intended purely as a convenience to advanced users who want to
* manipulate the data set directly.
*
* @return the raw Jena data set
*/
public Dataset getJenaDataset() {
Dataset set = null;
RDFConnectionRemoteBuilder getConnBuilder = RDFConnectionFuseki.create()
.destination(getFullURI() + "/get");
try (RDFConnectionFuseki getConn = (RDFConnectionFuseki) getConnBuilder.build()) {
getConn.begin(ReadWrite.READ);
set = getConn.fetchDataset();
getConn.commit();
logger.debug("Retrieved data set" + getName());
} catch (Exception e) {
logger.error("Unable to find data set " + getName(), e);
}
return set;
}
}
/******************************************************************************
* Copyright (c) 2019- UT-Battelle, LLC.
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the Eclipse Public License v1.0,
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Initial API and implementation and/or initial documentation -
* Jay Jay Billings
*****************************************************************************/
package org.eclipse.ice.tests.bats;
import static org.junit.Assert.*;
import java.util.UUID;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.ReadWrite;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdfconnection.RDFConnectionFuseki;
import org.apache.jena.rdfconnection.RDFConnectionRemoteBuilder;
import org.junit.BeforeClass;
import org.junit.Test;
import org.eclipse.ice.bats.DataSet;
/**
* This is a simple test of the BATS Dataset class. It requires that the Fuseki