|
| 1 | +include::../shared-settings.adoc[] |
| 2 | +:toclevels: 4 |
| 3 | +include::../header.adoc[] |
| 4 | +:javadoc: http://docs.rdf4j.org/javadoc/latest?org/eclipse/rdf4j/ |
| 5 | + |
| 6 | +:numbered!: |
| 7 | +== The SAIL API |
| 8 | + |
| 9 | +The RDF4J SAIL (Storage And Inference Layer) API is a collection of interfaces designed for low-level transactional access to RDF data. It functions as a decoupling point between specific database implementations and the functional modules (parsers, query engines, end-user API access, etc) of the RDF4J framework. |
| 10 | + |
| 11 | +Here, we document the design of the API and explain the roles and rationale behind the various interfaces. We also explain how various abstract base classes provided as part of the API can be reused by third-party implementors, in order to make implementing a SAIL-compatible database easier. |
| 12 | + |
| 13 | +WARNING: this document is currently in draft, and incomplete. Feedback and suggestions for change are welcome, either on our https://github.com/eclipse/rdf4j-doc[GitHub repo], or on the https://groups.google.com/forum/#!forum/rdf4j-users[RDF4J Users Group]. |
| 14 | + |
| 15 | +:numbered: |
| 16 | +== Overview |
| 17 | + |
| 18 | +.SAIL Main interfaces |
| 19 | +[[figure-sail-main]] |
| 20 | +[plantuml, diagram-sail-main, png] |
| 21 | +.... |
| 22 | +interface Sail { |
| 23 | + + initialize() |
| 24 | + + getConnection() |
| 25 | + + shutDown() |
| 26 | + + isWritable() |
| 27 | + .. |
| 28 | + + getSupportedIsolationLevels() |
| 29 | + + getDefaultIsolationLevel() |
| 30 | + .. |
| 31 | + + getValueFactory() |
| 32 | + + setDataDir() |
| 33 | + + getDatadir() |
| 34 | +} |
| 35 | +
|
| 36 | +interface SailConnection { |
| 37 | + + isOpen() |
| 38 | + + close() |
| 39 | + .. query .. |
| 40 | + + evaluate() |
| 41 | + + getContextIDs() |
| 42 | + + getNamespace() |
| 43 | + + getNamespaces() |
| 44 | + + getStatements() |
| 45 | + + hasStatement() |
| 46 | + + size() |
| 47 | + .. txn management .. |
| 48 | + + begin() |
| 49 | + + flush() |
| 50 | + + prepare() |
| 51 | + + commit() |
| 52 | + + rollback() |
| 53 | + + isActive() |
| 54 | + + startupdate() |
| 55 | + + endUpdate() |
| 56 | + .. data modification .. |
| 57 | + + addStatement() |
| 58 | + + removeStatements() |
| 59 | + + clear() |
| 60 | + + setNamespace() |
| 61 | + + removeNamespace() |
| 62 | + + clearNamespaces() |
| 63 | +} |
| 64 | +
|
| 65 | +Sail "1" *- "*" SailConnection |
| 66 | +
|
| 67 | +.... |
| 68 | + |
| 69 | +In diagram <<figure-sail-main>> we see an overview of the two main interfaces: `Sail`, and `SailConnection`. Although SAIL is not a JDBC implementation, its design is inspired by it. |
| 70 | +The `Sail` interface is the main access point for RDF storage. Roughly speaking, this is "the database". Each `Sail` object is composed of zero or more `SailConnection` objects. This is where all the actual database access functionality is concentrated. `SailConnection` provides methods to execute queries, retrieve and modify triples, and manage transactions. |
| 71 | + |
| 72 | +=== AbstractSail and AbstractSailConnection |
| 73 | + |
| 74 | +RDF4J provides default (abstract) implementations for most of the SAIL functionality, which can be reused (and of course overridden) by any concrete implementation. |
| 75 | + |
| 76 | +.Abstract base implementations |
| 77 | +[[figure-abstract-sail]] |
| 78 | +[plantuml, diagram-abstract-sail, png] |
| 79 | +.... |
| 80 | +interface Sail |
| 81 | +interface SailConnection |
| 82 | +abstract class AbstractSail { |
| 83 | + + setDataDir() |
| 84 | + + getDataDir() |
| 85 | + + initialize() |
| 86 | + + shutDown() |
| 87 | + + getConnection() |
| 88 | + + getSupportedIsolationLevels() |
| 89 | + + getDefaultIsolationLevel() |
| 90 | + + setDefaultIsolationLevel() |
| 91 | + + getIterationCacheSyncThreshold() |
| 92 | + + setIterationCacheSyncThreshold() |
| 93 | + # addSupportedIsolationLevel() |
| 94 | + # removeSupportedIsolationLevel() |
| 95 | + # setSupportedIsolationLevels( |
| 96 | + # isInitialized() |
| 97 | + -- abstract methods -- |
| 98 | + # {abstract} getConnectionInternal() |
| 99 | + # {abstract} shutDownInternal() |
| 100 | + # {abstract} initializeInternal() |
| 101 | +
|
| 102 | +} |
| 103 | +
|
| 104 | +abstract class AbstractSailConnection { |
| 105 | + # updateLock: ReentrantLock |
| 106 | + # connectionLock: ReentrantReadWriteLock |
| 107 | +
|
| 108 | + -- base public method impls -- |
| 109 | + + isOpen() |
| 110 | + + close() |
| 111 | + .. query .. |
| 112 | + + evaluate() |
| 113 | + + getContextIDs() |
| 114 | + + getNamespace() |
| 115 | + + getNamespaces() |
| 116 | + + getStatements() |
| 117 | + + hasStatement() |
| 118 | + + size() |
| 119 | + .. txn management .. |
| 120 | + + begin() |
| 121 | + + flush() |
| 122 | + + prepare() |
| 123 | + + commit() |
| 124 | + + rollback() |
| 125 | + + isActive() |
| 126 | + + startupdate() |
| 127 | + + endUpdate() |
| 128 | + .. data modification .. |
| 129 | + + addStatement() |
| 130 | + + removeStatements() |
| 131 | + + clear() |
| 132 | + + setNamespace() |
| 133 | + + removeNamespace() |
| 134 | + + clearNamespaces() |
| 135 | +
|
| 136 | + -- abstract methods -- |
| 137 | + # transactionActive() |
| 138 | + # endUpdateInternal() |
| 139 | + # {abstract} closeInternal() |
| 140 | + # {abstract} evaluateInternal() |
| 141 | + # {abstract} getContextIDsInternal() |
| 142 | + # {abstract} getStatementsInternal() |
| 143 | + # {abstract} sizeInternal() |
| 144 | + # {abstract} startTransactionInternal() |
| 145 | + # prepareInternal() |
| 146 | + # {abstract} commitInternal() |
| 147 | + # {abstract} rollbackInternal() |
| 148 | + # {abstract} addStatementInternal() |
| 149 | + # {abstract} removeStatementsInternal() |
| 150 | + # {abstract} clearInternal() |
| 151 | + # {abstract} getNamespacesInternal() |
| 152 | + # {abstract} getNamespaceInternal() |
| 153 | + # {abstract} setNamespaceInternal() |
| 154 | + # {abstract} removeNamespaceInternal() |
| 155 | + # {abstract} clearNamespacesInternal() |
| 156 | + # isActiveOperation() |
| 157 | +} |
| 158 | +
|
| 159 | +Sail <|-- AbstractSail |
| 160 | +SailConnection <|-- AbstractSailConnection |
| 161 | +AbstractSail <- AbstractSailConnection |
| 162 | +.... |
| 163 | + |
| 164 | +The `AbstractSail` class (see diagram <<figure-abstract-sail>>) provides base implementations of all methods of the `Sail` interface. It provides the following benefits to concrete Sail implementations: |
| 165 | + |
| 166 | +. implementations of all required basic getter/setter methods |
| 167 | +. store shutdown management, including grace periods for active connections and eventual forced closure of active connections on store shutdown. |
| 168 | +. thread-safety: take care of basic concurrency issues around opening multiple connections. |
| 169 | +. ongoing compatibility: future RDF4J releases that introduce new functionality in `Sail` provide default implementations in `AbstractSail`. |
| 170 | + |
| 171 | +Similarly, the `AbstractSailConnection` provides base implementations of all methods of the `SailConnection` interface. It provides the following benefits to concrete SailConnection implementations: |
| 172 | + |
| 173 | +. handles all basic concurrency issues around starting / executing transactions |
| 174 | +. (configurable) buffering of active changes in any transaction |
| 175 | +. ongoing compatibility: future RDF4J releases that introduce new functionality in `SailConnection` provide default implementations in `AbstractSailConnection`. |
| 176 | + |
| 177 | +The abstract base classes use the naming convention ``methodname**Internal**`` to indicate the methods that concrete subclasses should concentrate on implementing. The rationale is that the public method implementations in the abstract class implement basic concurrency handling and other book-keeping, and their corresponding (protected) `...Internal` methods can be implemented by the concrete subclass to provide the actual business logic of the method. |
| 178 | + |
| 179 | +For example, the query method `AbstractSailConnection.getStatements()` provides a lot of book keeping: it ensures pending updates are flushed, acquires a read lock on the connection, verifies the connection is still open, and takes care of internally registering the resulting `Iteration` from the query for resource management and concurrency purposes. In between all of this, it calls `getStatementsInternal`. The only job of this method is to answer the query by retrieving the relevant data from the data source. |
| 180 | + |
| 181 | +=== NotifyingSail and NotifyingSailConnection |
| 182 | + |
| 183 | +The `NotifyingSail` and `NotifyingSailConnection` interfaces provide basic event handling for SAIL implementations. Its main goal is to provide a messaging mechanism for closely-linked SAIL implementations (for example, a "Sail stack" where a reasoner is to be kept informed of changes to the underlying database). |
| 184 | + |
| 185 | +.NotifyingSail interfaces |
| 186 | +[[figure-notifying-sail]] |
| 187 | +[plantuml, diagram-notifying-sail, png] |
| 188 | +.... |
| 189 | +interface Sail |
| 190 | +interface SailConnection |
| 191 | +interface NotifyingSail { |
| 192 | + + addSailChangedListener(SailChangedListener) |
| 193 | + + removeSailChangedListener(SailChangedListener) |
| 194 | +} |
| 195 | +
|
| 196 | +interface NotifyingSailConnection { |
| 197 | + + addConnectionListener(SailConnectionListener) |
| 198 | + + removeConnectionListener(SailConnectionListener) |
| 199 | +} |
| 200 | +
|
| 201 | +interface SailChangedListener { |
| 202 | + + sailChanged(SailChangedEvent) |
| 203 | +} |
| 204 | +
|
| 205 | +interface SailChangedEvent { |
| 206 | + + Sail getSail() |
| 207 | + + boolean statementsAdded() |
| 208 | + + boolean statementsRemoved() |
| 209 | +} |
| 210 | +
|
| 211 | +interface SailConnectionListener { |
| 212 | + + statementAdded(Statement st) |
| 213 | + + statementRemoved(Statement st) |
| 214 | +} |
| 215 | +
|
| 216 | +Sail <|-- NotifyingSail |
| 217 | +SailConnection <|-- NotifyingSailConnection |
| 218 | +NotifyingSail *-- NotifyingSailConnection |
| 219 | +NotifyingSail - SailChangedListener |
| 220 | +SailChangedListener - SailChangedEvent |
| 221 | +NotifyingSailConnection - SailConnectionListener |
| 222 | +.... |
| 223 | + |
| 224 | +As can be seen in diagram <<figure-notifying-sail>>, the `NotifyingSail` interface provides the option of registering one or more `SailChangedListener` implementations. When registered, the listener will be messaged via the `sailChanged` method. The contents of the message is a `SailChangedEvent` that provides basic info on what has been changed. |
| 225 | + |
| 226 | +More fine-grained event data is available at the Connection level. The `NotifyingSailConnection` allows registering a `SailConnectionListener`, which receives a message for each individual statement added or removed on the connection. |
| 227 | + |
| 228 | +=== StackableSail |
| 229 | + |
| 230 | +TODO |
| 231 | + |
| 232 | +== Querying |
| 233 | + |
| 234 | +The SAIL API has no knowledge of SPARQL queries. Instead, it operates on a query algebra, that is, an object representation of a (SPARQL) query as provided by the SPARQL query parser. |
| 235 | + |
| 236 | +`SailConnection` has a single `evaluate()` method, which accepts a `TupleExpr` object. This is the object representation of the query as produced by the query parser. |
| 237 | + |
| 238 | + |
| 239 | +== Transactions |
| 240 | + |
| 241 | +TODO |
| 242 | + |
0 commit comments