[WIP] Implement Index Garbage Collection

samply · Nov 11, 2024 · bbcb594 · bbcb594
1 parent 5457c28
commit bbcb594
Show file tree

Hide file tree

Showing 37 changed files with 2,099 additions and 31 deletions.
diff --git a/.github/scripts/admin-api/check-column-family-estimate-num-keys.sh b/.github/scripts/admin-api/check-column-family-estimate-num-keys.sh
@@ -0,0 +1,11 @@
+#!/bin/bash -e
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+. "$SCRIPT_DIR/../util.sh"
+
+BASE="http://localhost:8080/fhir"
+COLUMN_FAMILY="$1"
+EXPECTED_ESTIMATE_NUM_KEYS="$2"
+ACTUAL_ESTIMATE_NUM_KEYS=$(curl -s "$BASE/__admin/dbs/index/column-families" | jq ".[] | select(.name == \"$COLUMN_FAMILY\").estimateNumKeys")
+
+test "estimate number of keys" "$ACTUAL_ESTIMATE_NUM_KEYS" "$EXPECTED_ESTIMATE_NUM_KEYS"
diff --git a/.github/scripts/admin-api/run-prune-job.sh b/.github/scripts/admin-api/run-prune-job.sh
@@ -0,0 +1,57 @@
+#!/bin/bash -e
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+. "$SCRIPT_DIR/../util.sh"
+
+BASE="http://localhost:8080/fhir"
+
+prune-job() {
+cat <<END
+{
+  "resourceType": "Task",
+  "id": "PruneJobReadyExample",
+  "meta": {
+    "profile": [
+      "https://samply.github.io/blaze/fhir/StructureDefinition/PruneJob"
+    ]
+  },
+  "input": [
+    {
+      "type": {
+        "coding": [
+          {
+            "code": "t",
+            "system": "https://samply.github.io/blaze/fhir/CodeSystem/PruneJobParameter",
+            "display": "T"
+          }
+        ]
+      },
+      "valuePositiveInt": 2000
+    }
+  ],
+  "code": {
+    "coding": [
+      {
+        "code": "prune",
+        "system": "https://samply.github.io/blaze/fhir/CodeSystem/JobType",
+        "display": "Prune the Database"
+      }
+    ]
+  },
+  "status": "ready",
+  "intent": "order",
+  "authoredOn": "2024-10-15T15:01:00.000Z"
+}
+END
+}
+
+RESULT="$(curl -s -H 'Content-Type: application/fhir+json' -H 'Accept: application/fhir+json' -d "$(prune-job)" "$BASE/__admin/Task")"
+test "resource type" "$(echo "$RESULT" | jq -r .resourceType)" "Task"
+test "status" "$(echo "$RESULT" | jq -r .status)" "ready"
+
+sleep 2
+
+ID="$(echo "$RESULT" | jq -r .id)"
+RESULT="$(curl -s -H 'Accept: application/fhir+json' "$BASE/__admin/Task/$ID")"
+test "resource type" "$(echo "$RESULT" | jq -r .resourceType)" "Task"
+test "status" "$(echo "$RESULT" | jq -r .status)" "completed"
diff --git a/.github/scripts/check-referential-integrity-enforced.sh b/.github/scripts/check-referential-integrity-enforced.sh
@@ -3,7 +3,7 @@
 ENFORCED=$(curl -s http://localhost:8080/fhir/metadata | jq -r 'isempty(.rest[].resource[].referencePolicy[] | select(. == "enforced")) | not')
 
 if [ "true" = "$ENFORCED" ]; then
-  echo "✅"
+  echo "✅ Referential Integrity Enforced is enforced"
 else
   echo "Fail 😞"
   exit 1

diff --git a/.github/scripts/install-blazectl.sh b/.github/scripts/install-blazectl.sh
@@ -1,6 +1,6 @@
 #!/bin/bash -e
 
-VERSION="0.16.0"
+VERSION="0.17.0"
 
 curl -sLO "https://github.com/samply/blazectl/releases/download/v$VERSION/blazectl-$VERSION-linux-amd64.tar.gz"
 tar xzf "blazectl-$VERSION-linux-amd64.tar.gz"

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -1187,7 +1187,7 @@ jobs:
       run: docker load --input /tmp/blaze.tar
 
     - name: Run Blaze
-      run: docker run --name blaze -d -e JAVA_TOOL_OPTIONS=-Xmx2g -e DB_BLOCK_CACHE_SIZE=512 -e ENABLE_OPERATION_PATIENT_PURGE=true -p 8080:8080 --read-only --tmpfs /tmp:exec -v blaze-data:/app/data blaze:latest
+      run: docker run --name blaze -d -e JAVA_TOOL_OPTIONS=-Xmx2g -e DB_BLOCK_CACHE_SIZE=512 -e ENABLE_OPERATION_PATIENT_PURGE=true -e ENABLE_ADMIN_API=true -p 8080:8080 --read-only --tmpfs /tmp:exec -v blaze-data:/app/data blaze:latest
 
     - name: Wait for Blaze
       run: .github/scripts/wait-for-url.sh http://localhost:8080/health
@@ -1207,6 +1207,15 @@ jobs:
     - name: Check Total-Number of Resources are 92114
       run: .github/scripts/check-total-number-of-resources.sh 92114
 
+    - name: Check Estimate Number of Keys in the ResourceAsOf Index
+      run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "resource-as-of-index" "92114"
+
+    - name: Check Estimate Number of Keys in the TypeAsOf Index
+      run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "type-as-of-index" "92114"
+
+    - name: Check Estimate Number of Keys in the SystemAsOf Index
+      run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "system-as-of-index" "92114"
+
     - name: Patient Purge all
       run: .github/scripts/patient-purge-all.sh
 
@@ -1219,6 +1228,30 @@ jobs:
     - name: Check Resource Totals
       run: .github/scripts/check-resource-totals-after-patient-purge.sh
 
+    - name: Check Total-Number of Resources are 13358
+      run: .github/scripts/check-total-number-of-resources.sh 13358
+
+    - name: Run Prune Job
+      run: .github/scripts/admin-api/run-prune-job.sh
+
+    - name: Compact Column Family resource-as-of-index
+      run: blazectl --server http://localhost:8080/fhir compact index resource-as-of-index
+
+    - name: Compact Column Family type-as-of-index
+      run: blazectl --server http://localhost:8080/fhir compact index type-as-of-index
+
+    - name: Compact Column Family system-as-of-index
+      run: blazectl --server http://localhost:8080/fhir compact index system-as-of-index
+
+    - name: Check Estimate Number of Keys in the ResourceAsOf Index
+      run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "resource-as-of-index" "13358"
+
+    - name: Check Estimate Number of Keys in the TypeAsOf Index
+      run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "type-as-of-index" "13358"
+
+    - name: Check Estimate Number of Keys in the SystemAsOf Index
+      run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "system-as-of-index" "13358"
+
     - name: Docker Stats
       run: docker stats --no-stream
 

diff --git a/docs/implementation/database.md b/docs/implementation/database.md
@@ -16,16 +16,19 @@ The idea behind an immutable database is, that the whole database content at a p
 
 Database values are not copied entirely from one version to the next. Instead, like in persistent data structures, structural sharing is used. As such each database value can be seen as a complete copy of the database from the outside, but at the inside, the implementation is efficient enough to be feasible nowadays.
 
-**Note:** In contrast, relational databases, which where designed in the 80's, use an update in-place model, because storage was expensive then.
+> [!NOTE]
+> In contrast, relational databases, which where designed in the 80's, use an update in-place model, because storage was expensive then.
 
 A similar technic is [copy-on-write][4], used in many areas of computing like modern filesystems.
 
 Having a database architecture which consists of immutable database values evolving over time, were past values are kept either forever or at least sufficiently long, has the property that reads don't need coordination. Because database values can be referenced by `t`, an arbitrarily number queries, spread over arbitrarily periods of time, can access the same immutable snapshot of the database. In case data is replicated over multiple nodes, queries can even run in parallel, all accessing the same coherent database value.
 
-As one example, in paging of FHIR searchset or history bundles, Blaze simply refers to the database value's `t` of the first page, in order to calculate every remaining page based on the same stable database value.
+As one example, in paging of FHIR searchset or history bundles, Blaze simply refers to the database value's `t` of the first page, in order to calculate every remaining page based on the same immutable database value.
 
 In practise, each FHIR RESTful API read request will obtain the newest known database value and use that for all queries necessary to answer the request.
 
+Database values will be no longer accessible after [pruning](#pruning) because pruning actually removes purged data for GDPR regularity or simply space saving reasons. 
+
 ## Logical Data Model
 
 Datomic uses a fact based data model, were each fact is the triple `(entity, attribute, value)` for example `(<patient-id>, birthDate, 2020)`. This model has one big advantage, the minimum change between two database values will be a fact, which is quite small. The disadvantage is, that bigger structures like resources have to be reconstructed from individual facts. In addition to that, because in FHIR, updates are always whole resources, the actual changed facts have to be determined by diffing the old and new resource.
@@ -50,6 +53,8 @@ There are two different sets of indices, ones which depend on the database value
 | TypeStats         | type t    | total, num-changes                        |
 | SystemStats       | t         | total, num-changes                        |
 
+The first three indices `ResourceAsOf`, `TypeAsOf` and `SystemAsOf` put all versions of resources on the timeline. The only difference between that indices is the order of the parts of the key.
+
 #### ResourceAsOf
 
 The `ResourceAsOf` index is the primary index which maps the resource identifier `(type, id)` together with the `t` to the `content-hash` of the resource version. In addition to that, the index contains the number of changes `num-changes` to the resource, the operator `op` of the change leading to the index entry and an optional `purged-at` point in time were the version was purged.
@@ -86,7 +91,7 @@ In addition to direct resource lookup, the `ResourceAsOf` index is used for list
 
 #### TypeAsOf
 
-The `TypeAsOf` index contains the same information as the `ResourceAsOf` index with the difference that the components of the key are ordered `type`,  `t` and `id` instead of `type`, `id` and `t`. The index is used for listing all versions of all resources of a particular type. Such history listings start with the `t` of the database value going into the past. This is done by not only choosing the resource version with the latest `t` less or equal the database values `t` but instead using all older versions. Such versions even include deleted versions because in FHIR it is allowed to bring back a resource to a new life after it was already deleted. The listing is done by simply scanning through the index in reverse. Because the key is ordered by `type`,  `t` and  `id`, the entries will be first ordered by time, newest first, and second by resource identifier.
+The `TypeAsOf` index contains the same information as the `ResourceAsOf` index with the difference that the parts of the key are ordered `type`,  `t` and `id` instead of `type`, `id` and `t`. The index is used for listing all versions of all resources of a particular type. Such history listings start with the `t` of the database value going into the past. This is done by not only choosing the resource version with the latest `t` less or equal the database values `t` but instead using all older versions. Such versions even include deleted versions because in FHIR it is allowed to bring back a resource to a new life after it was already deleted. The listing is done by simply scanning through the index in reverse. Because the key is ordered by `type`,  `t` and  `id`, the entries will be first ordered by time, newest first, and second by resource identifier.
 
 #### SystemAsOf
 
@@ -100,7 +105,8 @@ The `PatientLastChange` index contains all changes to resources in the compartme
 
 The `TxSuccess` index contains the real point in time, as `java.time.Instant`, successful transactions happened. In other words, this index maps each `t` which is just a monotonically increasing number to a real point in time. 
 
-**Note:** Other than XTDB, Blaze is not a bitemporal. That means the time recorded in the history of resources is the transaction time, not a business time. That also means that one can't fix the history, because the history only reflects the transactions happened. 
+> [!NOTE]
+> Other than XTDB, Blaze is not a bitemporal. That means the time recorded in the history of resources is the transaction time, not a business time. That also means that one can't fix the history, because the history only reflects the transactions happened. 
 
 #### TxError
 
@@ -332,7 +338,7 @@ The `delete-history` command is used to delete the history of a resource.
 
 * get all instance history entries
 * add the `t` of the transaction as `purged-at?` to the value of each of the history entries not only in the ResourceAsOf index, but also in the TypeAsOf and SystemAsOf index
-* remove the number of history entries purged from the number of changes of `type` and thw whole system
+* remove the number of history entries purged from the number of changes of `type` and the whole system
 
 ### Patient Purge
 
@@ -345,6 +351,14 @@ The `patient-purge` command is used to remove all current and historical version
 | id         | yes      | string    | patient id                       |
 | check-refs | no       | boolean   | use referential integrity checks |
 
+## History Deletion / Purging
+
+Blaze keeps always a history of all resources. So updating or deleting a resource creates a new version and keeps all previous versions. However the history can be deleted using the [delete history](#delete-history) command. Also, in case all data of a particular patient including history should be deleted, the [patient purge](#patient-purge) command can be used.
+
+History deletion works in two stages, first the resource versions are marked are purged and second the marked versions are removed during [pruning](#pruning). The two sage process is necessary, because database values have to be still immutable and actually removing data would violate the immutability. The marks itself will contain the `t` of the transaction that purged the entry. 
+
+## Pruning
+
 [1]: <https://www.datomic.com>
 [2]: <https://xtdb.com>
 [3]: <https://en.wikipedia.org/wiki/Persistent_data_structure>

diff --git a/docs/implementation/fhir-data-model.md b/docs/implementation/fhir-data-model.md
@@ -34,7 +34,8 @@ will produce the following Clojure data structure:
  :deceasedBoolean false}
 ```
 
-**Note:** Clojure data structures are explained [here][3]. Clojure uses generic data structures like maps and lists instead of domain specific classes like Java.
+> [!NOTE]
+> Clojure data structures are explained [here][3]. Clojure uses generic data structures like maps and lists instead of domain specific classes like Java.
 
 The Clojure map looks exactly the same as the JSON document. The main difference is, that all keys are converted to Clojure keywords which can be written without quotes and always start with a colon. The parsing process is fully generic like in JavaScript. There is no need to define any domain specific classes like in Java.
 

diff --git a/job-ig/input/fsh/job.fsh b/job-ig/input/fsh/job.fsh
@@ -10,6 +10,7 @@ Title: "Job Type"
 * #async-interaction "Asynchronous Interaction Request"
 * #async-bulk-data "Asynchronous Bulk Data Request"
 * #compact "Compact a Database Column Family"
+* #prune "Prune the Database"
 * #re-index "(Re)Index a Search Parameter"
 
 CodeSystem: JobStatusReason