Skip to content

Commit

Permalink
[WIP] Implement Index Garbage Collection
Browse files Browse the repository at this point in the history
  • Loading branch information
alexanderkiel committed Oct 28, 2024
1 parent 3aeb70d commit 17f4cfd
Show file tree
Hide file tree
Showing 39 changed files with 2,224 additions and 59 deletions.
11 changes: 11 additions & 0 deletions .github/scripts/admin-api/check-column-family-estimate-num-keys.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash -e

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
. "$SCRIPT_DIR/../util.sh"

BASE="http://localhost:8080/fhir"
COLUMN_FAMILY="$1"
EXPECTED_ESTIMATE_NUM_KEYS="$2"
ACTUAL_ESTIMATE_NUM_KEYS=$(curl -s "$BASE/__admin/dbs/index/column-families" | jq ".[] | select(.name == \"$COLUMN_FAMILY\").estimateNumKeys")

test "estimate number of keys" "$ACTUAL_ESTIMATE_NUM_KEYS" "$EXPECTED_ESTIMATE_NUM_KEYS"
59 changes: 59 additions & 0 deletions .github/scripts/admin-api/run-prune-job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash -e

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
. "$SCRIPT_DIR/../util.sh"

BASE="http://localhost:8080/fhir"

prune-job() {
cat <<END
{
"resourceType": "Task",
"id": "PruneJobReadyExample",
"meta": {
"profile": [
"https://samply.github.io/blaze/fhir/StructureDefinition/PruneJob"
]
},
"input": [
{
"type": {
"coding": [
{
"code": "t",
"system": "https://samply.github.io/blaze/fhir/CodeSystem/PruneJobParameter",
"display": "T"
}
]
},
"valuePositiveInt": 1000
}
],
"code": {
"coding": [
{
"code": "prune",
"system": "https://samply.github.io/blaze/fhir/CodeSystem/JobType",
"display": "Prune the Database"
}
]
},
"status": "ready",
"intent": "order",
"authoredOn": "2024-10-15T15:01:00.000Z"
}
END
}

RESULT="$(curl -s -H 'Content-Type: application/fhir+json' -H 'Accept: application/fhir+json' -d "$(prune-job)" "$BASE/__admin/Task")"
test "resource type" "$(echo "$RESULT" | jq -r .resourceType)" "Task"
test "status" "$(echo "$RESULT" | jq -r .status)" "ready"

sleep 2

ID="$(echo "$RESULT" | jq -r .id)"
RESULT="$(curl -s -H 'Accept: application/fhir+json' "$BASE/__admin/Task/$ID")"
test "resource type" "$(echo "$RESULT" | jq -r .resourceType)" "Task"
test "status" "$(echo "$RESULT" | jq -r .status)" "completed"

echo "$RESULT" | jq '.'
2 changes: 1 addition & 1 deletion .github/scripts/check-referential-integrity-enforced.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
ENFORCED=$(curl -s http://localhost:8080/fhir/metadata | jq -r 'isempty(.rest[].resource[].referencePolicy[] | select(. == "enforced")) | not')

if [ "true" = "$ENFORCED" ]; then
echo ""
echo " Referential Integrity Enforced is enforced"
else
echo "Fail 😞"
exit 1
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,15 @@ jobs:
- name: Check Total-Number of Resources are 92114
run: .github/scripts/check-total-number-of-resources.sh 92114

- name: Check Estimate Number of Keys in the ResourceAsOf Index
run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "resource-as-of-index" "92114"

- name: Check Estimate Number of Keys in the TypeAsOf Index
run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "type-as-of-index" "92114"

- name: Check Estimate Number of Keys in the SystemAsOf Index
run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "system-as-of-index" "92114"

- name: Patient Purge all
run: .github/scripts/patient-purge-all.sh

Expand All @@ -1216,6 +1225,21 @@ jobs:
- name: Check Resource Totals
run: .github/scripts/check-resource-totals-after-patient-purge.sh

- name: Run Prune Job
run: .github/scripts/admin-api/run-prune-job.sh

- name: Check Total-Number of Resources are 92114
run: .github/scripts/check-total-number-of-resources.sh 13358

- name: Check Estimate Number of Keys in the ResourceAsOf Index
run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "resource-as-of-index" "13358"

- name: Check Estimate Number of Keys in the TypeAsOf Index
run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "type-as-of-index" "13358"

- name: Check Estimate Number of Keys in the SystemAsOf Index
run: .github/scripts/admin-api/check-column-family-estimate-num-keys.sh "system-as-of-index" "13358"

- name: Docker Stats
run: docker stats --no-stream

Expand Down
1 change: 1 addition & 0 deletions job-ig/input/fsh/job.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Id: JobType
Title: "Job Type"
* ^status = #active
* #re-index "(Re)Index a Search Parameter"
* #prune "Prune the Database"
* #compact "Compact Database Column Families"
* #async-interaction "Asynchronous Interaction Request"
* #async-bulk-data "Asynchronous Bulk Data Request"
Expand Down
149 changes: 149 additions & 0 deletions job-ig/input/fsh/prune-job.fsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
Alias: UCUM = http://unitsofmeasure.org
Alias: $FT = http://hl7.org/fhir/fhir-types
Alias: $JT = https://samply.github.io/blaze/fhir/CodeSystem/JobType
Alias: $JSR = https://samply.github.io/blaze/fhir/CodeSystem/JobStatusReason
Alias: $JO = https://samply.github.io/blaze/fhir/CodeSystem/JobOutput
Alias: $PJP = https://samply.github.io/blaze/fhir/CodeSystem/PruneJobParameter
Alias: $PJO = https://samply.github.io/blaze/fhir/CodeSystem/PruneJobOutput
Alias: $PI = https://samply.github.io/blaze/fhir/CodeSystem/PruneIndices

CodeSystem: PruneJobParameter
Id: PruneJobParameter
Title: "Prune Job Parameter"
* ^status = #active
* #t "T"

CodeSystem: PruneJobOutput
Id: PruneJobOutput
Title: "Prune Job Output"
* ^status = #active
* #total-index-entries "Total Index Entries"
* #index-entries-processed "Index Entries Processed"
* #index-entries-deleted "Index Entries Deleted"
* #processing-duration "Processing Duration"
* #next-index "Next Index"
* #next-type "Next Type"
* #next-id "Next Id"
* #next-t "Next T"

CodeSystem: PruneIndices
Id: PruneIndices
Title: "Prune Indices"
* ^status = #active
* #resource-as-of "ResourceAsOf"
* #type-as-of "TypeAsOf"
* #system-as-of "SystemAsOf"

ValueSet: PruneIndices
Id: PruneIndices
Title: "Prune Indices Value Set"
* ^status = #active
* include codes from system PruneIndices

Profile: PruneJob
Parent: Job
* code = $JT#prune "Prune the Database"
* input ^slicing.discriminator.type = #pattern
* input ^slicing.discriminator.path = "type"
* input ^slicing.rules = #open
* input contains t 1..1
* input[t] ^short = "T"
* input[t] ^definition = "The database point in time to use as start of pruning."
* input[t].type = $PJP#t
* input[t].value[x] only positiveInt
* output ^slicing.discriminator.type = #pattern
* output ^slicing.discriminator.path = "type"
* output ^slicing.rules = #open
* output contains totalIndexEntries 0..1
* output[totalIndexEntries] ^short = "Total Index Entries"
* output[totalIndexEntries] ^definition = "Estimated total number of index entries to prune."
* output[totalIndexEntries].type = $PJO#total-index-entries
* output[totalIndexEntries].value[x] only unsignedInt
* output contains indexEntriesProcessed 0..1
* output[indexEntriesProcessed] ^short = "Index Entries Processed"
* output[indexEntriesProcessed] ^definition = "Number of index entries processed."
* output[indexEntriesProcessed].type = $PJO#index-entries-processed
* output[indexEntriesProcessed].value[x] only unsignedInt
* output contains indexEntriesDeleted 0..1
* output[indexEntriesDeleted] ^short = "Index Entries Deleted"
* output[indexEntriesDeleted] ^definition = "Number of index entries deleted."
* output[indexEntriesDeleted].type = $PJO#index-entries-deleted
* output[indexEntriesDeleted].value[x] only unsignedInt
* output contains processingDuration 0..1
* output[processingDuration] ^short = "Processing Duration"
* output[processingDuration] ^definition = "Duration the pruning processing took. Durations while the job was paused don't count."
* output[processingDuration].type = $PJO#processing-duration
* output[processingDuration].value[x] only Quantity
* output[processingDuration].valueQuantity
* system 1..1
* system = UCUM
* code 1..1
* code = #s "seconds"
* output contains nextIndex 0..1
* output[nextIndex] ^short = "Next Index"
* output[nextIndex] ^definition = "The name of the index to continue with. Used in case the job is resumed after manual pausing or shutdown of Blaze."
* output[nextIndex].type = $PJO#next-index
* output[nextIndex].value[x] only code
* output[nextIndex].valueCode from PruneIndices
* output contains nextType 0..1
* output[nextType] ^short = "Next Type"
* output[nextType] ^definition = "The FHIR resource type to continue with. Used in case the job is resumed after manual pausing or shutdown of Blaze."
* output[nextType].type = $PJO#next-type
* output[nextType].value[x] only code
* output[nextType].valueCode from http://hl7.org/fhir/ValueSet/resource-types
* output contains nextId 0..1
* output[nextId] ^short = "Next Id"
* output[nextId] ^definition = "The FHIR resource id to continue with. Used in case the job is resumed after manual pausing or shutdown of Blaze."
* output[nextId].type = $PJO#next-id
* output[nextId].value[x] only id
* output contains nextT 0..1
* output[nextT] ^short = "Next T"
* output[nextT] ^definition = "The database point in time to continue with. Used in case the job is resumed after manual pausing or shutdown of Blaze."
* output[nextT].type = $PJO#next-t
* output[nextT].value[x] only positiveInt

Instance: PruneJobReadyExample
InstanceOf: PruneJob
* status = #ready
* intent = #order
* code = $JT#prune "Prune the Database"
* authoredOn = "2024-10-15T15:01:00.000Z"
* input[t].type = $PJP#t "T"
* input[t].valuePositiveInt = 42

Instance: PruneJobInProgressExample
InstanceOf: PruneJob
* status = #in-progress
* statusReason = $JSR#started "Started"
* intent = #order
* code = $JT#prune "Prune the Database"
* authoredOn = "2024-10-15T15:01:00.000Z"
* input[t].type = $PJP#t "T"
* input[t].valuePositiveInt = 42
* output[totalIndexEntries].type = $PJO#total-index-entries "Total Index Entries"
* output[totalIndexEntries].valueUnsignedInt = 1000
* output[indexEntriesProcessed].type = $PJO#index-entries-processed "Index Entries Processed"
* output[indexEntriesProcessed].valueUnsignedInt = 100
* output[indexEntriesDeleted].type = $PJO#index-entries-deleted "Index Entries Deleted"
* output[indexEntriesDeleted].valueUnsignedInt = 10
* output[processingDuration].type = $PJO#processing-duration "Processing Duration"
* output[processingDuration].valueQuantity.value = 10
* output[nextIndex].type = $PJO#next-index "Next Index"
* output[nextIndex].valueCode = $PI#resource-as-of
* output[nextType].type = $PJO#next-type "Next Type"
* output[nextType].valueCode = $FT#Patient
* output[nextId].type = $PJO#next-id "Next Id"
* output[nextId].valueId = "0"
* output[nextT].type = $PJO#next-t "Next T"
* output[nextT].valuePositiveInt = 23

Instance: PruneJobFailedExample
InstanceOf: PruneJob
* status = #failed
* intent = #order
* code = $JT#prune "Prune the Database"
* authoredOn = "2024-10-15T15:01:00.000Z"
* input[t].type = $PJP#t "T"
* input[t].valuePositiveInt = 42
* output[error].type = $JO#error "Error"
* output[error].valueString = "error message"
Loading

0 comments on commit 17f4cfd

Please sign in to comment.