Skip to content

Commit 7cbd593

Browse files
committed
Pull request #277: Development
Merge in ITB/shacl-validator from development to master * commit '4b8996ab7a972f822ff2e128a922154acdea8ff6': Caching of materialised shape models where possible to improve performance and memory consumption Reduced logging noise
2 parents e8b13c6 + 4b8996a commit 7cbd593

File tree

3 files changed

+148
-59
lines changed

3 files changed

+148
-59
lines changed

shaclvalidator-common/src/main/java/eu/europa/ec/itb/shacl/validation/FileManager.java

Lines changed: 138 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,16 @@
1515

1616
package eu.europa.ec.itb.shacl.validation;
1717

18+
import com.apicatalog.jsonld.JsonLdOptions;
19+
import com.apicatalog.jsonld.http.DefaultHttpClient;
20+
import com.apicatalog.jsonld.http.media.MediaType;
21+
import com.apicatalog.jsonld.loader.FileLoader;
22+
import com.apicatalog.jsonld.loader.HttpLoader;
23+
import com.apicatalog.jsonld.loader.SchemeRouter;
1824
import eu.europa.ec.itb.shacl.ApplicationConfig;
1925
import eu.europa.ec.itb.shacl.DomainConfig;
2026
import eu.europa.ec.itb.shacl.SparqlQueryConfig;
27+
import eu.europa.ec.itb.shacl.ValidationSpecs;
2128
import eu.europa.ec.itb.shacl.util.ShaclValidatorUtils;
2229
import eu.europa.ec.itb.validation.commons.BaseFileManager;
2330
import eu.europa.ec.itb.validation.commons.FileInfo;
@@ -33,7 +40,11 @@
3340
import org.apache.jena.rdf.model.Model;
3441
import org.apache.jena.riot.Lang;
3542
import org.apache.jena.riot.RDFLanguages;
43+
import org.apache.jena.riot.RDFParserBuilder;
44+
import org.apache.jena.riot.system.PrefixMap;
45+
import org.apache.jena.riot.system.PrefixMapStd;
3646
import org.apache.jena.sparql.exec.http.QueryExecutionHTTP;
47+
import org.apache.jena.sparql.util.Context;
3748
import org.slf4j.Logger;
3849
import org.slf4j.LoggerFactory;
3950
import org.springframework.beans.factory.annotation.Autowired;
@@ -44,11 +55,16 @@
4455
import java.nio.file.Files;
4556
import java.nio.file.Path;
4657
import java.util.List;
58+
import java.util.Map;
59+
import java.util.Objects;
4760
import java.util.UUID;
4861
import java.util.concurrent.ConcurrentHashMap;
62+
import java.util.concurrent.locks.ReentrantLock;
63+
import java.util.function.Supplier;
4964

5065
import static eu.europa.ec.itb.shacl.util.ShaclValidatorUtils.handleEquivalentContentSyntaxes;
5166
import static eu.europa.ec.itb.shacl.util.ShaclValidatorUtils.isRdfContentSyntax;
67+
import static org.apache.jena.riot.lang.LangJSONLD11.JSONLD_OPTIONS;
5268

5369
/**
5470
* Manages file-system operations.
@@ -62,6 +78,8 @@ public class FileManager extends BaseFileManager<ApplicationConfig> {
6278
private ApplicationConfig appConfig;
6379

6480
private final ConcurrentHashMap<String, Path> shaclModelCache = new ConcurrentHashMap<>();
81+
private final ConcurrentHashMap<String, Model> materialisedShaclModelCache = new ConcurrentHashMap<>();
82+
private final ReentrantLock cacheLock = new ReentrantLock();
6583

6684
/**
6785
* Create a cache key to use for SHACL model file lookup.
@@ -181,12 +199,22 @@ public void writeShaclShapes(Path outputPath, Model rdfModel, String validationT
181199
String outputSyntaxToUse = outputSyntax;
182200
if (domainConfig.canCacheShapes(validationType)) {
183201
// Write the model to a file and cache it.
184-
String cacheKey = toShaclModelCacheKey(domainConfig, validationType, outputSyntax);
185-
Path cachedPath = shaclModelCache.get(cacheKey);
186-
if (cachedPath == null || !Files.exists(cachedPath)) {
187-
// Initialise in case of first access or in case file was removed.
188-
cachedPath = storeShapeGraph(rdfModel, outputSyntaxToUse);
189-
shaclModelCache.put(cacheKey, cachedPath);
202+
Path cachedPath;
203+
cacheLock.lock();
204+
try {
205+
String cacheKey = toShaclModelCacheKey(domainConfig, validationType, outputSyntax);
206+
cachedPath = shaclModelCache.get(cacheKey);
207+
if (cachedPath == null || !Files.exists(cachedPath)) {
208+
// Initialise in case of first access or in case file was removed.
209+
cachedPath = storeShapeGraph(rdfModel, outputSyntaxToUse);
210+
if (logger.isDebugEnabled()) {
211+
logger.debug("Cached shape model for [{}] at [{}]", cacheKey, cachedPath.toAbsolutePath());
212+
}
213+
shaclModelCache.put(cacheKey, cachedPath);
214+
materialisedShaclModelCache.put(cacheKey, rdfModel);
215+
}
216+
} finally {
217+
cacheLock.unlock();
190218
}
191219
Files.copy(cachedPath, outputPath);
192220
} else {
@@ -196,6 +224,110 @@ public void writeShaclShapes(Path outputPath, Model rdfModel, String validationT
196224
}
197225
}
198226

227+
/**
228+
* Build the RDF model from the provided stream.
229+
*
230+
* @param dataStream The stream to read from.
231+
* @param rdfLanguage The content type of the stream's data.
232+
* @return The parsed model.
233+
*/
234+
public Model readModel(InputStream dataStream, Lang rdfLanguage, Map<String, String> nsPrefixes) {
235+
var builder = RDFParserBuilder
236+
.create()
237+
.lang(rdfLanguage)
238+
.source(dataStream);
239+
if (nsPrefixes != null) {
240+
// Before parsing set the prefixes of the model to avoid mismatches.
241+
PrefixMap prefixes = new PrefixMapStd();
242+
prefixes.putAll(nsPrefixes);
243+
builder = builder.prefixes(prefixes);
244+
}
245+
if (Lang.JSONLD11.equals(rdfLanguage) || Lang.JSONLD.equals(rdfLanguage)) {
246+
var options = new JsonLdOptions();
247+
var httpLoader = new HttpLoader(DefaultHttpClient.defaultInstance());
248+
/*
249+
* Set fallback type for remote contexts to avoid errors for non JSON/JSON-LD Content Types.
250+
* This allows us to proceed if e.g. the Content Type originally returned is "text/plain".
251+
*/
252+
httpLoader.setFallbackContentType(MediaType.JSON);
253+
options.setDocumentLoader(new SchemeRouter()
254+
.set("http", httpLoader)
255+
.set("https", httpLoader)
256+
.set("file", new FileLoader()));
257+
builder = builder.context(Context.create().set(JSONLD_OPTIONS, options));
258+
}
259+
return builder.build().toModel();
260+
}
261+
262+
/**
263+
* Add the provided model to the cache if possible.
264+
*
265+
* @param specs The current validation settings.
266+
* @param shapeModel The model to cache.
267+
*/
268+
private void cacheShapeModelIfPossible(ValidationSpecs specs, Model shapeModel) {
269+
if (specs.getDomainConfig().canCacheShapes(specs.getValidationType()) && !specs.isLoadImports()) {
270+
String cacheKey = toShaclModelCacheKey(specs.getDomainConfig(), specs.getValidationType(), specs.getDomainConfig().getDefaultReportSyntax());
271+
cacheLock.lock();
272+
try {
273+
materialisedShaclModelCache.putIfAbsent(cacheKey, shapeModel);
274+
} finally {
275+
cacheLock.unlock();
276+
}
277+
}
278+
}
279+
280+
/**
281+
* Get the shapes model from the cache or from the provided supplier function.
282+
*
283+
* @param specs The current validation settings.
284+
* @param modelSupplierIfNotCached Supplier for the model if it was not found in the cache.
285+
* @return The model to use.
286+
*/
287+
public Model getShapeModel(ValidationSpecs specs, Supplier<Model> modelSupplierIfNotCached) {
288+
Model cachedModel = null;
289+
String cacheKey = toShaclModelCacheKey(specs.getDomainConfig(), specs.getValidationType(), specs.getDomainConfig().getDefaultReportSyntax());
290+
if (specs.isLoadImports()) {
291+
if (specs.isLogProgress() && logger.isDebugEnabled()) {
292+
logger.debug("Cached shape model for [{}] not loaded as we are loading imports", cacheKey);
293+
}
294+
} else {
295+
cacheLock.lock();
296+
try {
297+
Model materialisedModel = materialisedShaclModelCache.get(cacheKey);
298+
if (materialisedModel == null) {
299+
Path modelPath = shaclModelCache.get(cacheKey);
300+
if (modelPath != null) {
301+
try (InputStream dataStream = Files.newInputStream(modelPath)) {
302+
materialisedModel = readModel(dataStream, RDFLanguages.contentTypeToLang(specs.getDomainConfig().getDefaultReportSyntax()), null);
303+
} catch (IOException e) {
304+
throw new ValidatorException("validator.label.exception.errorReadingShaclFile", e);
305+
}
306+
materialisedShaclModelCache.put(cacheKey, materialisedModel);
307+
cachedModel = materialisedModel;
308+
}
309+
} else {
310+
cachedModel = materialisedModel;
311+
}
312+
} finally {
313+
cacheLock.unlock();
314+
}
315+
if (specs.isLogProgress() && logger.isDebugEnabled()) {
316+
if (cachedModel == null) {
317+
logger.debug("Cached shape model for [{}] not found", cacheKey);
318+
} else {
319+
logger.debug("Cached shape model for [{}] found", cacheKey);
320+
}
321+
}
322+
}
323+
cachedModel = Objects.requireNonNullElseGet(cachedModel, () -> {
324+
Model loadedModel = modelSupplierIfNotCached.get();
325+
cacheShapeModelIfPossible(specs, loadedModel);
326+
return loadedModel;
327+
});
328+
return cachedModel;
329+
}
330+
199331
/**
200332
* Write the provided RDF model to the SHACL shape cache.
201333
*

shaclvalidator-common/src/main/java/eu/europa/ec/itb/shacl/validation/SHACLValidator.java

Lines changed: 7 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,6 @@
1515

1616
package eu.europa.ec.itb.shacl.validation;
1717

18-
import com.apicatalog.jsonld.JsonLdOptions;
19-
import com.apicatalog.jsonld.http.DefaultHttpClient;
20-
import com.apicatalog.jsonld.http.media.MediaType;
21-
import com.apicatalog.jsonld.loader.FileLoader;
22-
import com.apicatalog.jsonld.loader.HttpLoader;
23-
import com.apicatalog.jsonld.loader.SchemeRouter;
2418
import com.gitb.tr.BAR;
2519
import com.gitb.tr.TestAssertionReportType;
2620
import com.gitb.vs.ValidateRequest;
@@ -49,11 +43,7 @@
4943
import org.apache.jena.rdf.model.*;
5044
import org.apache.jena.riot.Lang;
5145
import org.apache.jena.riot.RDFLanguages;
52-
import org.apache.jena.riot.RDFParserBuilder;
53-
import org.apache.jena.riot.system.PrefixMap;
54-
import org.apache.jena.riot.system.PrefixMapStd;
5546
import org.apache.jena.shared.JenaException;
56-
import org.apache.jena.sparql.util.Context;
5747
import org.apache.jena.vocabulary.RDF;
5848
import org.slf4j.Logger;
5949
import org.slf4j.LoggerFactory;
@@ -68,7 +58,6 @@
6858

6959
import static eu.europa.ec.itb.shacl.util.ShaclValidatorUtils.*;
7060
import static eu.europa.ec.itb.shacl.validation.SHACLResources.VALIDATION_REPORT;
71-
import static org.apache.jena.riot.lang.LangJSONLD11.JSONLD_OPTIONS;
7261

7362
/**
7463
* Component used to validate RDF content against SHACL shapes.
@@ -190,7 +179,7 @@ private ValidateRequest preparePluginInput(File pluginTmpFolder) {
190179
} else {
191180
// Make a converted copy.
192181
try (FileInputStream in = new FileInputStream(getInputFileToUse()); FileWriter out = new FileWriter(pluginInputFile)) {
193-
Model fileModel = readModel(in, contentSyntaxToUse, null);
182+
Model fileModel = fileManager.readModel(in, contentSyntaxToUse, null);
194183
fileManager.writeRdfModel(out, fileModel, Lang.RDFXML.getContentType().getContentTypeStr());
195184
} catch (IOException e) {
196185
throw new IllegalStateException("Unable to convert input file for plugin", e);
@@ -377,8 +366,9 @@ private Model validateShacl(List<FileInfo> shaclFiles) {
377366
this.dataModel = ModelFactory.createDefaultModel();
378367
this.aggregatedShapes = ModelFactory.createDefaultModel();
379368
} else {
369+
// Construct the shapes graph.
370+
this.aggregatedShapes = fileManager.getShapeModel(specs, () -> getShapesModel(shaclFiles));
380371
// Get data to validate from file
381-
this.aggregatedShapes = getShapesModel(shaclFiles);
382372
this.dataModel = getDataModel(getInputFileToUse(), this.aggregatedShapes);
383373
// Perform the validation of data, using the shapes model. Do not validate any shapes inside the data model.
384374
Resource resource = ValidationUtil.validateModel(dataModel, this.aggregatedShapes, false);
@@ -418,10 +408,10 @@ private Model getShapesModel(List<FileInfo> shaclFiles) {
418408
throw new ValidatorException("validator.label.exception.unableToDetermineShaclContentType");
419409
}
420410
try (InputStream dataStream = new FileInputStream(shaclFile.getFile())) {
421-
aggregateModel.add(readModel(dataStream, rdfLanguage, null));
411+
aggregateModel.add(fileManager.readModel(dataStream, rdfLanguage, null));
422412
} catch (IOException e) {
423413
throw new ValidatorException("validator.label.exception.errorReadingShaclFile", e);
424-
}
414+
}
425415
}
426416
if (this.importedShapes!=null) {
427417
this.importedShapes.close();
@@ -570,41 +560,6 @@ private Lang contextSyntaxToUse() {
570560
return contentSyntaxLang;
571561
}
572562

573-
/**
574-
* Build the RDF model from the provided stream.
575-
*
576-
* @param dataStream The stream to read from.
577-
* @param rdfLanguage The content type of the stream's data.
578-
* @return The parsed model.
579-
*/
580-
private Model readModel(InputStream dataStream, Lang rdfLanguage, Map<String, String> nsPrefixes) {
581-
var builder = RDFParserBuilder
582-
.create()
583-
.lang(rdfLanguage)
584-
.source(dataStream);
585-
if (nsPrefixes != null) {
586-
// Before parsing set the prefixes of the model to avoid mismatches.
587-
PrefixMap prefixes = new PrefixMapStd();
588-
prefixes.putAll(nsPrefixes);
589-
builder = builder.prefixes(prefixes);
590-
}
591-
if (Lang.JSONLD11.equals(rdfLanguage) || Lang.JSONLD.equals(rdfLanguage)) {
592-
var options = new JsonLdOptions();
593-
var httpLoader = new HttpLoader(DefaultHttpClient.defaultInstance());
594-
/*
595-
* Set fallback type for remote contexts to avoid errors for non JSON/JSON-LD Content Types.
596-
* This allows us to proceed if e.g. the Content Type originally returned is "text/plain".
597-
*/
598-
httpLoader.setFallbackContentType(MediaType.JSON);
599-
options.setDocumentLoader(new SchemeRouter()
600-
.set("http", httpLoader)
601-
.set("https", httpLoader)
602-
.set("file", new FileLoader()));
603-
builder = builder.context(Context.create().set(JSONLD_OPTIONS, options));
604-
}
605-
return builder.build().toModel();
606-
}
607-
608563
/**
609564
* Prepare the data graph model for the provided inputs.
610565
*
@@ -616,7 +571,7 @@ private Model getDataModel(File dataFile, Model shapesModel) {
616571
// Upload the data in the Model.
617572
Model dataModel;
618573
try (InputStream dataStream = new FileInputStream(dataFile)) {
619-
dataModel = readModel(dataStream, contextSyntaxToUse(), shapesModel == null ? null : shapesModel.getNsPrefixMap());
574+
dataModel = fileManager.readModel(dataStream, contextSyntaxToUse(), shapesModel == null ? null : shapesModel.getNsPrefixMap());
620575
if (this.specs.isLoadImports()) {
621576
if (this.specs.isLogProgress()) {
622577
LOG.info("Loading imports...");
@@ -659,7 +614,7 @@ private File getInputFileToUse() {
659614
// preprocessing: execute the CONSTRUCT query
660615
Model inputModel;
661616
try (InputStream dataStream = new FileInputStream(specs.getInputFileToValidate())) {
662-
inputModel = readModel(dataStream, contextSyntaxToUse(), null);
617+
inputModel = fileManager.readModel(dataStream, contextSyntaxToUse(), null);
663618
} catch (IOException e) {
664619
throw new ValidatorException("validator.label.exception.errorWhileReadingProvidedContent", e, e.getMessage());
665620
} catch (JenaException e) {

shaclvalidator-common/src/main/resources/application.properties

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,6 @@ validator.identifier = rdf
7272
# Default country detection for statistics reporting
7373
validator.webhook.statisticsEnableCountryDetection = false
7474
# Default http header for the proxied ip
75-
validator.webhook.ipheader = X-Real-IP
75+
validator.webhook.ipheader = X-Real-IP
76+
# Disable warnings from Jena model parsing.
77+
logging.level.org.apache.jena.riot=error

0 commit comments

Comments
 (0)