Skip to content

Commit

Permalink
Create $compaction Operation
Browse files Browse the repository at this point in the history
Closes: #208
  • Loading branch information
alexanderkiel committed Nov 8, 2024
1 parent 529987e commit 71e26dd
Show file tree
Hide file tree
Showing 72 changed files with 2,366 additions and 689 deletions.
2 changes: 2 additions & 0 deletions .clj-kondo/root/config.edn
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
blaze.fhir.spec.references fsr
blaze.fhir.structure-definition-repo sdr
blaze.job-scheduler js
blaze.job.async-interaction job-async
blaze.job.compact job-compact
blaze.middleware.fhir.db db
blaze.rest-api.header header
blaze.scheduler sched
Expand Down
22 changes: 22 additions & 0 deletions .github/scripts/compact-all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash -e

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

"$SCRIPT_DIR/compact.sh" index search-param-value-index
"$SCRIPT_DIR/compact.sh" index resource-value-index
"$SCRIPT_DIR/compact.sh" index compartment-search-param-value-index
"$SCRIPT_DIR/compact.sh" index compartment-resource-type-index
"$SCRIPT_DIR/compact.sh" index active-search-params
"$SCRIPT_DIR/compact.sh" index tx-success-index
"$SCRIPT_DIR/compact.sh" index tx-error-index
"$SCRIPT_DIR/compact.sh" index t-by-instant-index
"$SCRIPT_DIR/compact.sh" index resource-as-of-index
"$SCRIPT_DIR/compact.sh" index type-as-of-index
"$SCRIPT_DIR/compact.sh" index system-as-of-index
"$SCRIPT_DIR/compact.sh" index patient-last-change-index
"$SCRIPT_DIR/compact.sh" index type-stats-index
"$SCRIPT_DIR/compact.sh" index system-stats-index
"$SCRIPT_DIR/compact.sh" index cql-bloom-filter
"$SCRIPT_DIR/compact.sh" index cql-bloom-filter-by-t
"$SCRIPT_DIR/compact.sh" transaction default
"$SCRIPT_DIR/compact.sh" resource default
47 changes: 47 additions & 0 deletions .github/scripts/compact-unknown-database.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash -e

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
. "$SCRIPT_DIR/util.sh"

START_EPOCH="$(date +"%s")"

eclipsed() {
EPOCH="$(date +"%s")"
echo $((EPOCH - START_EPOCH))
}

BASE="http://localhost:8080/fhir"

parameters() {
cat <<END
{
"resourceType": "Parameters",
"parameter": [
{
"name": "database",
"valueCode": "foo"
},
{
"name": "column-family",
"valueCode": "bar"
}
]
}
END
}

HEADERS="$(curl -sfH 'Accept: application/fhir+json' -H 'Content-Type: application/fhir+json' -d "$(parameters)" -o /dev/null -D - "$BASE/\$compact")"
STATUS_URL=$(echo "$HEADERS" | grep -i content-location | tr -d '\r' | cut -d: -f2- | xargs)

# wait for response available
while [[ ($(eclipsed) -lt 120) && ("$(curl -s -o /dev/null -w '%{response_code}' "$STATUS_URL")" != "200") ]]; do
sleep 1
done

BUNDLE=$(curl -sH 'Accept: application/fhir+json' "$STATUS_URL")

test "bundle type" "$(echo "$BUNDLE" | jq -r '.type')" "batch-response"
test "response status" "$(echo "$BUNDLE" | jq -r '.entry[0].response.status')" "400"
test "response severity" "$(echo "$BUNDLE" | jq -r '.entry[0].response.outcome.issue[0].severity')" "error"
test "response code" "$(echo "$BUNDLE" | jq -r '.entry[0].response.outcome.issue[0].code')" "invalid"
test "response diagnostics" "$(echo "$BUNDLE" | jq -r '.entry[0].response.outcome.issue[0].diagnostics')" "Unknown database \`foo\`."
93 changes: 93 additions & 0 deletions .github/scripts/compact.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/bash -e

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
. "$SCRIPT_DIR/util.sh"

if [[ "$OSTYPE" == "darwin"* ]]; then
DATE_CMD="gdate"
else
DATE_CMD="date"
fi

START_EPOCH="$(date +"%s")"

eclipsed() {
EPOCH="$(date +"%s")"
echo $((EPOCH - START_EPOCH))
}

BASE="http://localhost:8080/fhir"

DATABASE="$1"
COLUMN_FAMILY="$2"

parameters() {
cat <<END
{
"resourceType": "Parameters",
"parameter": [
{
"name": "database",
"valueCode": "$DATABASE"
},
{
"name": "column-family",
"valueCode": "$COLUMN_FAMILY"
}
]
}
END
}

NOW_EPOCH_SECONDS=$($DATE_CMD +%s)
HEADERS="$(curl -sfH 'Accept: application/fhir+json' -H 'Content-Type: application/fhir+json' -d "$(parameters)" -o /dev/null -D - "$BASE/\$compact")"
STATUS_URL=$(echo "$HEADERS" | grep -i content-location | tr -d '\r' | cut -d: -f2- | xargs)

# wait for response available
while [[ ($(eclipsed) -lt 120) && ("$(curl -s -o /dev/null -w '%{response_code}' "$STATUS_URL")" != "200") ]]; do
sleep 1
done

JOB_ID=$(echo "$STATUS_URL" | cut -d '/' -f6)
JOB=$(curl -s -H 'Accept: application/fhir+json' "$BASE/__admin/Task/$JOB_ID")

test "profile URL" "$(echo "$JOB" | jq -r '.meta.profile[]')" "https://samply.github.io/blaze/fhir/StructureDefinition/CompactJob"
test "status" "$(echo "$JOB" | jq -r '.status')" "completed"

AUTHORED_ON_ISO=$(echo "$JOB" | jq -r '.authoredOn')
AUTHORED_ON_EPOCH_SECONDS=$($DATE_CMD -d "$AUTHORED_ON_ISO" +%s)
if ((NOW_EPOCH_SECONDS - AUTHORED_ON_EPOCH_SECONDS < 10)); then
echo "✅ the authoredOn dateTime is set and current"
else
echo "🆘 the authoredOn dateTime is $AUTHORED_ON_ISO, but should be a current dateTime"
exit 1
fi

PARAMETER_URI="https://samply.github.io/blaze/fhir/CodeSystem/CompactJobParameter"

input_expr() {
echo ".input[] | select(.type.coding[] | select(.system == \"$PARAMETER_URI\" and .code == \"$1\"))"
}

test "database" "$(echo "$JOB" | jq -r "$(input_expr "database") | .valueCode")" "$DATABASE"
test "column-family" "$(echo "$JOB" | jq -r "$(input_expr "column-family") | .valueCode")" "$COLUMN_FAMILY"

OUTPUT_URI="https://samply.github.io/blaze/fhir/CodeSystem/CompactJobOutput"

output_expr() {
echo ".output[] | select(.type.coding[] | select(.system == \"$OUTPUT_URI\" and .code == \"$1\"))"
}

PROCESSING_DURATION="$(echo "$JOB" | jq "$(output_expr "processing-duration") | .valueQuantity")"
test "processing-duration unit system" "$(echo "$PROCESSING_DURATION" | jq -r .system)" "http://unitsofmeasure.org"
test "processing-duration unit code" "$(echo "$PROCESSING_DURATION" | jq -r .code)" "s"

# History
JOB_HISTORY=$(curl -s -H 'Accept: application/fhir+json' "$BASE/__admin/Task/$JOB_ID/_history")

test "history resource type" "$(echo "$JOB_HISTORY" | jq -r '.resourceType')" "Bundle"
test "history bundle type" "$(echo "$JOB_HISTORY" | jq -r '.type')" "history"
test "history total" "$(echo "$JOB_HISTORY" | jq -r '.total')" "3"
test "history 0 status" "$(echo "$JOB_HISTORY" | jq -r '.entry[0].resource.status')" "completed"
test "history 1 status" "$(echo "$JOB_HISTORY" | jq -r '.entry[1].resource.status')" "in-progress"
test "history 2 status" "$(echo "$JOB_HISTORY" | jq -r '.entry[2].resource.status')" "ready"
11 changes: 10 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ jobs:
run: docker load --input /tmp/blaze.tar

- name: Run Blaze
run: docker run --name blaze -d -e JAVA_TOOL_OPTIONS=-Xmx2g -e DB_BLOCK_CACHE_SIZE=2048 -p 8080:8080 --read-only --tmpfs /tmp:exec -v blaze-data:/app/data blaze:latest
run: docker run --name blaze -d -e JAVA_TOOL_OPTIONS=-Xmx2g -e DB_BLOCK_CACHE_SIZE=2048 -e ENABLE_ADMIN_API=true -p 8080:8080 --read-only --tmpfs /tmp:exec -v blaze-data:/app/data blaze:latest

- name: Wait for Blaze
run: .github/scripts/wait-for-url.sh http://localhost:8080/health
Expand All @@ -1142,6 +1142,9 @@ jobs:
- name: Load Data
run: blazectl --no-progress --server http://localhost:8080/fhir upload test-data-synthea-1000

- name: Compact All Column Families
run: .github/scripts/compact-all.sh

- name: Check Total-Number of Resources are 1099594
run: .github/scripts/check-total-number-of-resources.sh 1099594

Expand Down Expand Up @@ -1298,6 +1301,12 @@ jobs:
- name: Create Async Job
run: .github/scripts/admin-api/async-job.sh

- name: Load Data
run: blazectl --no-progress --server http://localhost:8080/fhir upload .github/test-data/synthea

- name: Compact Unknown Database
run: .github/scripts/compact-unknown-database.sh

# This test uploads many small transactions in order to show that Blaze can handle many small requests containing
# chunked payload blazectl uses. Versions of Blaze from 0.13.2 to 0.15.3, starting with the migration to Jetty, had
# the problem that the JSON parser did not read the entire inputstream so that terminal chunks could remain, which
Expand Down
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,19 @@ clean: $(MODULES) clean-root
build-frontend:
$(MAKE) -C modules/frontend build

build-frontend-image: build-frontend
docker build -t blaze-frontend:latest modules/frontend

build-ingress:
$(MAKE) -C modules/ingress all

uberjar: prep
clojure -T:build uber

build-all: uberjar build-frontend build-ingress
build-image: uberjar
docker build -t blaze:latest .

build-all: build-image build-frontend-image build-ingress

outdated:
clojure -M:outdated
Expand All @@ -62,5 +68,5 @@ cloc-test-root:
cloc-test: $(MODULES) cloc-test-root

.PHONY: $(MODULES) lint-root lint prep test-root test test-coverage clean-root \
clean build-frontend build-ingress uberjar build-all outdated deps-tree \
deps-list emacs-repl cloc-prod cloc-test
clean build-frontend build-frontend-image build-ingress uberjar build-all \
outdated deps-tree deps-list emacs-repl cloc-prod cloc-test
3 changes: 3 additions & 0 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
blaze/interaction
{:local/root "modules/interaction"}

blaze.operation/compact
{:local/root "modules/operation-compact"}

blaze.operation/graphql
{:local/root "modules/operation-graphql"}

Expand Down
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ The return content is a Bundle with type set to `history` containing the version

The following Operations are implemented:

* [$compact](api/operation-compact.md)
* [$graphql](http://hl7.org/fhir/resource-operation-graphql.html)
* [Measure $evaluate-measure](api/operation-measure-evaluate-measure.md)
* [Patient $everything](api/operation-patient-everything.md)
Expand Down
6 changes: 6 additions & 0 deletions docs/api/admin.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Admin API

> [!NOTE]
> To enable the Admin API, set the environment variable `ENABLE_ADMIN_API` to true. By default, this API is disabled for security.
> [!CAUTION]
> The Admin API exposes sensitive system information about your Blaze environment. Always enable [authentication](../authentication.md) when using the Admin API to prevent unauthorized access.
## OpenAPI Spec

```sh
Expand Down
57 changes: 57 additions & 0 deletions docs/api/operation-compact.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Operation \$compact

> [!NOTE]
> The system level \$compact operation is only activated if the [Admin API](./admin.md) is activated.
The system level \$compact operation is used to compact column families of RocksDB databases. RockDB compaction is done automatically but in certain situations manual compaction can be useful.

```
POST [base]/$compact
```

## Parameters

| Use | Name | Cardinality | Type | Documentation |
|-----|---------------|-------------|------|------------------------------------------------------------------------|
| IN | database | 1..1 | code | One of three possible databases: `index`, `transaction` and `resource` |
| IN | column-family | 1..1 | code | Name of the column family depending on the database. See below. |

### Column Families

| Database | Column Family |
|-------------|--------------------------------------|
| index | search-param-value-index |
| index | resource-value-index |
| index | compartment-search-param-value-index |
| index | compartment-resource-type-index |
| index | active-search-params |
| index | tx-success-index |
| index | tx-error-index |
| index | t-by-instant-index |
| index | resource-as-of-index |
| index | type-as-of-index |
| index | system-as-of-index |
| index | patient-last-change-index |
| index | type-stats-index |
| index | system-stats-index |
| index | cql-bloom-filter |
| index | cql-bloom-filter-by-t |
| transaction | default |
| resource | default |

### Response

The response will be always async according the [Asynchronous Interaction Request Pattern][2] from FHIR R5.

## Using blazectl

The \$compact operation can be executed using [blazectl][1].

### Example

```sh
blazectl --server http://localhost:8080/fhir compact index resource-as-of-index
```

[1]: <https://github.com/samply/blazectl>
[2]: <http://hl7.org/fhir/R5/async-bundle.html>
5 changes: 4 additions & 1 deletion job-ig/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,7 @@ validator_cli.jar:
validate: validator_cli.jar
java -jar validator_cli.jar -version 4.0.1 -level error -debug -ig fsh-generated/resources fsh-generated/resources/Task-*.json fsh-generated/resources/Bundle-*.json

.PHONY: install build validate
clean:
rm -r fsh-generated

.PHONY: install build validate clean
27 changes: 27 additions & 0 deletions job-ig/input/fsh/column-family.fsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
CodeSystem: ColumnFamily
Id: ColumnFamily
Title: "Column Family"
* ^status = #active
* #default
* #search-param-value-index
* #resource-value-index
* #compartment-search-param-value-index
* #compartment-resource-type-index
* #active-search-params
* #tx-success-index
* #tx-error-index
* #t-by-instant-index
* #resource-as-of-index
* #type-as-of-index
* #system-as-of-index
* #patient-last-change-index
* #type-stats-index
* #system-stats-index
* #cql-bloom-filter
* #cql-bloom-filter-by-t

ValueSet: ColumnFamily
Id: ColumnFamily
Title: "Column Family Value Set"
* ^status = #active
* include codes from system ColumnFamily
Loading

0 comments on commit 71e26dd

Please sign in to comment.