Skip to content

Commit

Permalink
Implement Searching by List
Browse files Browse the repository at this point in the history
Closes: #159
  • Loading branch information
alexanderkiel committed Jun 13, 2020
1 parent 3d70ebe commit b42f1c6
Show file tree
Hide file tree
Showing 30 changed files with 751 additions and 548 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ lint-cql:
lint-db:
cd modules/db; clojure -A:clj-kondo --lint src test

lint-db-protocols:
cd modules/db-protocols; clojure -A:clj-kondo --lint src

lint-db-stub:
cd modules/db-stub; clojure -A:clj-kondo --lint src

Expand Down Expand Up @@ -72,7 +75,7 @@ lint-terminology-service:
lint-thread-pool-executor-collector:
cd modules/thread-pool-executor-collector; clojure -A:clj-kondo --lint src

lint: lint-anomaly lint-coll lint-cql lint-db lint-db-stub lint-executor lint-extern-terminology-service lint-fhir-client lint-fhir-path lint-fhir-structure lint-interaction lint-kv lint-module-base lint-openid-auth lint-operations-measure-evaluate-measure lint-rest-api lint-rest-util lint-rocksdb lint-search-param-registry lint-spec lint-structure-definition lint-terminology-service lint-thread-pool-executor-collector
lint: lint-anomaly lint-coll lint-cql lint-db lint-db-protocols lint-db-stub lint-executor lint-extern-terminology-service lint-fhir-client lint-fhir-path lint-fhir-structure lint-interaction lint-kv lint-module-base lint-openid-auth lint-operations-measure-evaluate-measure lint-rest-api lint-rest-util lint-rocksdb lint-search-param-registry lint-spec lint-structure-definition lint-terminology-service lint-thread-pool-executor-collector
clojure -A:clj-kondo --lint src test

modules/cql/cql-test:
Expand Down
10 changes: 10 additions & 0 deletions modules/db-protocols/deps.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{:deps
{}

:aliases
{:clj-kondo
{:extra-deps
{clj-kondo
{:mvn/version "2020.06.12"}}

:main-opts ["-m" "clj-kondo.main"]}}}
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,14 @@

(defprotocol Query
(-execute
[query node snapshot raoi svri csvri t]
[query node snapshot raoi svri csvri t arg1]))
[query node snapshot raoi svri rsvi csvri t]
[query node snapshot raoi svri rsvi csvri t arg1]))


(defprotocol SearchParam
(-compile-values [search-param values])
(-resources [search-param node snapshot spvi rsvi raoi tid compiled-value t])
(-compartment-keys [search-param cspvi compartment tid compiled-value])
(-matches? [search-param snapshot tid id hash compiled-values])
(-compartment-ids [_ resolver resource])
(-index-entries [_ resolver hash resource linked-compartments]))
1 change: 0 additions & 1 deletion modules/db-stub/src/blaze/db/api_stub.clj
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
{:search-param-value-index nil
:resource-value-index nil
:compartment-search-param-value-index nil
:compartment-resource-value-index nil
:compartment-resource-type-index nil
:resource-index nil
:active-search-params nil
Expand Down
9 changes: 4 additions & 5 deletions modules/db/NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ In Crux an entity can have multiple documents over time. Every time a new docume
| Name | Key Parts | Value |
|---|---|---|
| SVR | c-hash tid value id hash-prefix | - |
| ResourceValue | tid id hash-prefix c-hash | values |
| RSV | tid id hash-prefix c-hash value | - |
| CSVR | co-c-hash co-res-id sp-c-hash tid value id hash-prefix | - |
| CompartmentResourceValue | co-c-hash co-res-id tid id hash-prefix c-hash value? | value? |
| CompartmentResourceType | co-c-hash co-res-id tid id | - |
| SearchParam | code tid | id |
| ActiveSearchParams | id | - |
Expand All @@ -74,7 +73,7 @@ In Crux an entity can have multiple documents over time. Every time a new docume

We can make hashes in SearchParam indices shorter (4-bytes) because we only need to differentiate between the versions of a resource. The odds of a hash collision is 1 out of 10000 for about 1000 versions. In case of a hash collision we would produce a false positive query hit. So we would return more resources instead of less, which is considered fine in FHIR.

### Search-param Value Resource (SVR)
### Search param Value Resource version (SVR)

The key consists of:

Expand All @@ -92,9 +91,9 @@ The key contains the id of the resource for two reasons, first we can skip to th

The SVR index is comparable to the AVET index in Datomic. Search parameters are the equivalent of indexed attributes in Datomic.

### ResourceValue
### Resource version Search param Value (RSV)


The ResourceValue index is comparable to the EAVT index in Datomic although it's actually more like a ETAV index which doesn't exist in Datomic.

### Compartment Search-param Value Resource (CSVR)

Expand Down
3 changes: 3 additions & 0 deletions modules/db/deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
{blaze/coll
{:local/root "../coll"}

blaze/db-protocols
{:local/root "../db-protocols"}

blaze/executor
{:local/root "../executor"}

Expand Down
18 changes: 10 additions & 8 deletions modules/db/src/blaze/db/impl/batch_db.clj
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
(set! *warn-on-reflection* true)


(defrecord BatchDb [node snapshot raoi svri cri csvri t]
(defrecord BatchDb [node snapshot raoi svri rsvi cri csvri t]
p/Db

;; ---- Instance-Level Functions --------------------------------------------
Expand Down Expand Up @@ -67,10 +67,10 @@
;; ---- Common Query Functions ----------------------------------------------

(-execute-query [_ query]
(p/-execute query node snapshot raoi svri csvri t))
(p/-execute query node snapshot raoi svri rsvi csvri t))

(-execute-query [_ query arg1]
(p/-execute query node snapshot raoi svri csvri t arg1))
(p/-execute query node snapshot raoi svri rsvi csvri t arg1))



Expand Down Expand Up @@ -152,6 +152,7 @@
(close [_]
(.close ^Closeable raoi)
(.close ^Closeable svri)
(.close ^Closeable rsvi)
(.close ^Closeable cri)
(.close ^Closeable csvri)
(.close ^Closeable snapshot)))
Expand All @@ -163,19 +164,19 @@

(defrecord TypeQuery [tid clauses]
p/Query
(-execute [_ node snapshot raoi svri _ t]
(index/type-query node snapshot svri raoi tid clauses t)))
(-execute [_ node snapshot raoi svri rsvi _ t]
(index/type-query node snapshot svri rsvi raoi tid clauses t)))


(defrecord SystemQuery [clauses]
p/Query
(-execute [_ node snapshot raoi svri _ t]
(index/system-query node snapshot svri raoi clauses t)))
(-execute [_ node snapshot raoi svri rsvi _ t]
(index/system-query node snapshot svri rsvi raoi clauses t)))


(defrecord CompartmentQuery [c-hash tid clauses]
p/Query
(-execute [_ node snapshot raoi _ cspvi t arg1]
(-execute [_ node snapshot raoi _ _ cspvi t arg1]
(let [compartment {:c-hash c-hash :res-id (codec/id-bytes arg1)}]
(index/compartment-query node snapshot cspvi raoi compartment
tid clauses t))))
Expand All @@ -194,6 +195,7 @@
node snapshot
(kv/new-iterator snapshot :resource-as-of-index)
(kv/new-iterator snapshot :search-param-value-index)
(kv/new-iterator snapshot :resource-value-index)
(kv/new-iterator snapshot :compartment-resource-type-index)
(kv/new-iterator snapshot :compartment-search-param-value-index)
t)))
28 changes: 20 additions & 8 deletions modules/db/src/blaze/db/impl/codec.clj
Original file line number Diff line number Diff line change
Expand Up @@ -142,30 +142,33 @@
(defn resource-value-key
{:arglists '([tid id hash c-hash] [tid id hash c-hash value])}
([tid ^bytes id ^bytes hash c-hash]
(-> (ByteBuffer/allocate (+ tid-size (alength id) hash-prefix-size
(-> (ByteBuffer/allocate (+ tid-size 1 (alength id) hash-prefix-size
c-hash-size))
(.putInt tid)
(.put (byte (alength id)))
(.put id)
(.put hash 0 hash-prefix-size)
(.putInt c-hash)
(.array)))
([tid ^bytes id ^bytes hash c-hash ^bytes value]
(-> (ByteBuffer/allocate (+ tid-size (alength id) hash-prefix-size
(-> (ByteBuffer/allocate (+ tid-size 1 (alength id) hash-prefix-size
c-hash-size (alength value)))
(.putInt tid)
(.put (byte (alength id)))
(.put id)
(.put hash 0 hash-prefix-size)
(.putInt c-hash)
(.put value)
(.array))))


(defn contains-v-hash? [^bytes v-hashes ^bytes v-hash]
(loop [idx 0]
(if (Arrays/equals v-hashes idx (unchecked-add-int idx v-hash-size) v-hash 0 v-hash-size)
true
(when (< idx (unchecked-subtract-int (alength v-hashes) v-hash-size))
(recur (unchecked-add-int idx v-hash-size))))))
(defn decode-resource-value-key [^ByteBuffer bb]
(let [id-size (.get bb (+ (.position bb) tid-size))
prefix (byte-array (+ tid-size 1 id-size hash-prefix-size c-hash-size))
value (byte-array (- (.remaining bb) (alength prefix)))]
(.get bb prefix)
(.get bb value)
[prefix value]))



Expand Down Expand Up @@ -515,6 +518,15 @@
(.asBytes (.hashString (Hashing/murmur3_32) ^String value utf-8)))


(defn tid-id
"Returns a byte array with tid from `type` followed by `id`."
[type ^String id]
(let [bb (ByteBuffer/allocate (+ tid-size (.length id)))]
(.putInt bb (tid type))
(.put bb (.getBytes id iso-8859-1))
(.array bb)))


(defn string
"Returns a lexicographically sortable byte string of the `string` value."
[string]
Expand Down
56 changes: 9 additions & 47 deletions modules/db/src/blaze/db/impl/index.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
[blaze.db.impl.index.resource-as-of :as resource-as-of]
[blaze.db.impl.iterators :as i]
[blaze.db.impl.search-param :as search-param]
[blaze.db.impl.util :as util]
[blaze.db.kv :as kv]
[taoensso.nippy :as nippy])
(:import
[blaze.db.impl.index.resource Hash]
[clojure.lang IReduceInit]
[java.nio ByteBuffer]))
[clojure.lang IReduceInit]))


(set! *warn-on-reflection* true)
Expand Down Expand Up @@ -44,36 +42,6 @@

;; ---- Type-Level Functions ------------------------------------------------

(def ^:private by-id-grouper
"Transducer which groups `[id hash-prefix]` tuples by `id` and concatenates
all hash-prefixes within each group, outputting `[id hash-prefixes]` tuples."
(comp
(partition-by (fn [[_ id]] (ByteBuffer/wrap id)))
(map
(fn group-hash-prefixes [[[_ id hash-prefix] & more]]
[id (cons hash-prefix (map #(nth % 2) more))]))))


(defn- non-deleted-resource [node raoi tid id t]
(when-let [resource (resource-as-of/resource node raoi tid id t)]
(when-not (resource/deleted? resource)
resource)))


(defn- resource-mapper [node raoi tid t]
(mapcat
(fn [[id hash-prefixes]]
(when-let [resource (non-deleted-resource node raoi tid id t)]
[[resource hash-prefixes]]))))


(def ^:private matches-hash-prefixes-filter
(mapcat
(fn [[resource hash-prefixes]]
(when (some #(bytes/starts-with? (resource/hash resource) %) hash-prefixes)
[resource]))))


(defn- other-clauses-filter [snapshot tid clauses]
(if (seq clauses)
(filter
Expand All @@ -88,21 +56,18 @@
identity))


(defn type-query [node snapshot svri raoi tid clauses t]
(defn type-query [node snapshot svri rsvi raoi tid clauses t]
(let [[[search-param values] & other-clauses] clauses]
(coll/eduction
(util/comp
by-id-grouper
(resource-mapper node raoi tid t)
matches-hash-prefixes-filter
(other-clauses-filter snapshot tid other-clauses))
(search-param/keys search-param snapshot svri tid values))))
(other-clauses-filter snapshot tid other-clauses)
(search-param/resources search-param node snapshot svri rsvi raoi tid
values t))))



;; ---- System-Level Functions ------------------------------------------------

(defn system-query [_ _ _ _ _ _]
(defn system-query [_ _ _ _ _ _ _]
;; TODO: implement
[])

Expand Down Expand Up @@ -148,9 +113,6 @@
[node snapshot csvri raoi compartment tid clauses t]
(let [[[search-param values] & other-clauses] clauses]
(coll/eduction
(util/comp
by-id-grouper
(resource-mapper node raoi tid t)
matches-hash-prefixes-filter
(other-clauses-filter snapshot tid other-clauses))
(search-param/compartment-keys search-param csvri compartment tid values))))
(other-clauses-filter snapshot tid other-clauses)
(search-param/compartment-resources node search-param csvri raoi
compartment tid values t))))
4 changes: 2 additions & 2 deletions modules/db/src/blaze/db/impl/index/resource_as_of.clj
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@

(defn hash-state-t
"Returns a triple of `hash`, `state` and `t` of the resource with `tid` and
`id` at or before `t`."
`id` at or before `t` if their is any."
[raoi tid id t]
(with-raoi-kv
raoi (codec/resource-as-of-key tid id t)
Expand All @@ -343,7 +343,7 @@


(defn resource
"Returns a resource with `tid` and `id` at or before `t`."
"Returns a resource with `tid` and `id` at or before `t` if their is any.."
[node raoi tid id t]
(with-raoi-kv
raoi (codec/resource-as-of-key tid id t)
Expand Down
Loading

0 comments on commit b42f1c6

Please sign in to comment.