Skip to content

Commit

Permalink
Built/tested testapp to be sure optimizations don't break functionality.
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Jul 26, 2021
1 parent e8d7c9e commit fe15ce0
Show file tree
Hide file tree
Showing 13 changed files with 337 additions and 67 deletions.
2 changes: 1 addition & 1 deletion deps.edn
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{:paths ["src"]
{:paths ["src" "resources"]
;;We serialize datasets to transit-json
:deps {com.cognitect/transit-cljs {:mvn/version "0.8.269"}
techascent/tech.ml.dataset {:mvn/version "6.006"}
Expand Down
1 change: 1 addition & 0 deletions src/deps.cljs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{:npm-deps {"base64-js" "^1.5.0"}}
90 changes: 47 additions & 43 deletions src/tech/v3/dataset.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[tech.v3.datatype.arrays :as arrays]
[tech.v3.datatype.datetime :as dtype-dt]
[tech.v3.dataset.impl.dataset :as ds-impl]
[tech.v3.dataset.impl.column :as col-impl]
[tech.v3.dataset.impl.column :as col-impl :refer [Column]]
[tech.v3.dataset.protocols :as ds-proto]
[tech.v3.dataset.io.column-parsers :as col-parsers]
[base64-js :as b64]
Expand Down Expand Up @@ -634,11 +634,12 @@ user> (ds/missing (*1 :c))
(defn- numeric-data->b64
[data]
(-> (clone data)
(dtype/ensure-typed-array)
(aget "buffer")
(js/Uint8Array.)
(b64/fromByteArray)))
(let [data (clone data)
data (dtype/ensure-typed-array data)]
(-> data
(.-buffer)
(js/Uint8Array.)
(b64/fromByteArray))))
(defn- string-col->data
Expand Down Expand Up @@ -669,7 +670,7 @@ user> (ds/missing (*1 :c))
(numeric-data->b64)))
(defn- col->data
(defn column->data
[col]
(let [col-dt (dtype/elemwise-datatype col)]
{:metadata (meta col)
Expand All @@ -678,17 +679,17 @@ user> (ds/missing (*1 :c))
:data
(cond
(dtype/numeric-type? col-dt)
(numeric-data->b64 (aget col "buf"))
(numeric-data->b64 (ds-proto/-column-buffer col))
(= :boolean col-dt)
(numeric-data->b64 (dtype/make-container :uint8 (aget col "buf")))
(numeric-data->b64 (dtype/make-container :uint8 (ds-proto/-column-buffer col)))
(= :string col-dt)
(string-col->data col)
(= :local-date col-dt)
(obj-col->numeric-b64 col :int32 dtype-dt/local-date->epoch-days)
(= :instant col-dt)
(obj-col->numeric-b64 col :int64 dtype-dt/instant->epoch-milliseconds)
:else
(dtype/as-js-array (dtype/make-container :object (aget col "buf"))))}))
(dtype/as-js-array (dtype/make-container :object (ds-proto/-column-buffer col))))}))
(defn dataset->data
Expand All @@ -698,13 +699,13 @@ user> (ds/missing (*1 :c))
{:metadata (meta ds)
:flavor :transit
:version 1
:columns (mapv col->data (columns ds))})
:columns (mapv column->data (columns ds))})
(defn- b64->numeric-data
[b64data dtype]
(let [bdata (-> (b64/toByteArray b64data)
(aget "buffer"))]
(.-buffer))]
(case dtype
:int8 (js/Int8Array. bdata)
:uint8 bdata
Expand All @@ -728,43 +729,46 @@ user> (ds/missing (*1 :c))
coldata))
(defn data->column
[{:keys [metadata missing data]}]
(let [dtype (:datatype metadata)]
#:tech.v3.dataset{:metadata metadata
:missing (dtype/->js-set missing)
;;do not re-scan data.
:force-datatype? true
:data
(cond
(dtype/numeric-type? dtype)
(b64->numeric-data data dtype)
(= :boolean dtype)
(arrays/make-boolean-array (b64/toByteArray data))
(= :string dtype)
(str-data->coldata data)
(= :local-date dtype)
(->> (b64->numeric-data data :int32)
(dtype/emap dtype-dt/epoch-days->local-date :local-date))
(= :instant dtype)
(->> (b64->numeric-data data :int64)
;;int64 data comes out as js/bigints
(dtype/emap #(-> (js/Number. %)
(dtype-dt/epoch-milliseconds->instant))
:instant))
:else
(if (and (dtype/counted? data)
(dtype/indexed? data))
;;access data in place
(arrays/make-typed-buffer data dtype)
(dtype/make-container dtype data)))
:name (:name metadata)}))
(defn data->dataset
[ds-data]
(when-not (and (contains? ds-data :metadata)
(contains? ds-data :columns))
(throw (js/Error. "This does not seem like dataset data, missing required keys")))
(->> (:columns ds-data)
(map
(fn [{:keys [metadata missing data]}]
(let [dtype (:datatype metadata)]
#:tech.v3.dataset{:metadata metadata
:missing (dtype/->js-set missing)
;;do not re-scan data.
:force-datatype? true
:data
(cond
(dtype/numeric-type? dtype)
(b64->numeric-data data dtype)
(= :boolean dtype)
(arrays/make-boolean-array (b64/toByteArray data))
(= :string dtype)
(str-data->coldata data)
(= :local-date dtype)
(->> (b64->numeric-data data :int32)
(dtype/emap dtype-dt/epoch-days->local-date :local-date))
(= :instant dtype)
(->> (b64->numeric-data data :int64)
;;int64 data comes out as js/bigints
(dtype/emap #(-> (js/Number. %)
(dtype-dt/epoch-milliseconds->instant))
:instant))
:else
(if (and (dtype/counted? data)
(dtype/indexed? data))
;;access data in place
(arrays/make-typed-buffer data dtype)
(dtype/make-container dtype data)))
:name (:name metadata)})))
(map data->column)
(ds-impl/new-dataset (:metadata ds-data))))
Expand Down
1 change: 1 addition & 0 deletions src/tech/v3/dataset/impl/column.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@
(new-column new-buf new-missing (meta col) numeric?)))
ds-proto/PColumn
(-is-column? [this] true)
(-column-buffer [this] buf)
ds-proto/PRowCount
(-row-count [this] (count buf))
ds-proto/PMissing
Expand Down
3 changes: 2 additions & 1 deletion src/tech/v3/dataset/protocols.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
(-select-columns [this colnames]))

(defprotocol PColumn
(-is-column? [col]))
(-is-column? [col])
(-column-buffer [col]))


(defprotocol PDataset
Expand Down
54 changes: 32 additions & 22 deletions src/tech/v3/datatype/argops.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,38 @@

(defn argfilter
"Return an array of indexes that pass the filter."
[pred data]
(let [data (dt-base/ensure-indexable data)
n-data (count data)]
(case :list-filter
:ary-filter
(let [indexes (dt-cmc/make-container :int32 (range n-data))
idx-ary (dt-base/as-typed-array indexes)]
(if-let [data (dt-base/as-agetable data)]
(.filter idx-ary #(boolean (pred (aget data %))))
(.filter idx-ary #(boolean (pred (nth data %)))))
indexes)
:list-filter
(let [indexes (dt-list/make-list :int32)
n-data (count data)]
(if-let [data (dt-base/as-agetable data)]
(dotimes [idx n-data]
(when (pred (aget data idx))
(dt-proto/-add indexes idx)))
(dotimes [idx n-data]
(when (pred (nth data idx))
(dt-proto/-add indexes idx))))
indexes))))
([pred data]
(let [data (dt-base/ensure-indexable data)
n-data (count data)]
(case :list-filter
:ary-filter
(let [indexes (dt-cmc/make-container :int32 (range n-data))
idx-ary (dt-base/as-typed-array indexes)]
(if-let [data (dt-base/as-agetable data)]
(.filter idx-ary #(boolean (pred (aget data %))))
(.filter idx-ary #(boolean (pred (nth data %)))))
indexes)
:list-filter
(let [indexes (dt-list/make-list :int32)
n-data (count data)]
(if-let [data (dt-base/as-agetable data)]
(dotimes [idx n-data]
(when (pred (aget data idx))
(dt-proto/-add indexes idx)))
(dotimes [idx n-data]
(when (pred (-nth data idx))
(dt-proto/-add indexes idx))))
indexes))))
;;In this case the data itself must be truthy.
;;Avoids the use of an unnecessary predicate fn
([data]
(let [data (dt-base/ensure-indexable data)
n-data (count data)
indexes (dt-list/make-list :int32)]
(dotimes [idx n-data]
(when (-nth data idx)
(dt-proto/-add indexes idx)))
indexes)))


(defn arggroup
Expand Down
6 changes: 6 additions & 0 deletions testapp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
node_modules
.cpcache
target
resources/public
package.json
package-lock.json
10 changes: 10 additions & 0 deletions testapp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Test App for tmdjs

## Usage

1. Create package.json - `clj -M:cljs-install -m cljs.main --install-deps`.
2. `npm install`
3. `clj -M:cljs compile app`


Now you can run the server from the main namespace and check out the timings.
39 changes: 39 additions & 0 deletions testapp/deps.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{:paths ["src" "resources"]
:deps {com.cnuernber/tmdjs {:mvn/version "1.00-beta-1-SNAPSHOT"}
http-kit/http-kit {:mvn/version "2.5.3"}
hiccup/hiccup {:mvn/version"1.0.5"}
bidi/bidi {:mvn/version "2.1.6"}
ring/ring {:mvn/version "1.7.0"}
ring/ring-codec {:mvn/version "1.1.3"}
metosin/muuntaja {:mvn/version "0.6.8"}
amalloy/ring-gzip-middleware {:mvn/version "0.1.4"}}
:aliases
{:cljs
{:extra-deps {thheller/shadow-cljs {:mvn/version "2.12.4"}
cider/cider-nrepl {:mvn/version "0.26.0"}
cljs-ajax/cljs-ajax {:mvn/version "0.8.3"}
re-frame/re-frame {:mvn/version "1.2.0"}}
;;dev-resources contains logback.xml which disables annoying jboss logging
:extra-paths ["dev-resources"]
:main-opts ["-m" "shadow.cljs.devtools.cli"]}

;;used for installing base package.json
;;clj -M:cljs-install -m cljs.main --install-deps
:cljs-install
{:extra-deps {thheller/shadow-cljs {:mvn/version "2.12.4"}
cljs-ajax/cljs-ajax {:mvn/version "0.8.0"}
re-frame/re-frame {:mvn/version "1.2.0"}}}
:standalone-server
{:replace-deps {com.github.seancorfield/depstar {:mvn/version "2.0.193"}}
:ns-default hf.depstar
:exec-fn hf.depstar/uberjar
:exec-args {:group-id "com.cnuernber"
:artifact-id "testapp"
:version "1.00-beta-1"
:sync-pom true
:aot true
:main-class testapp.main
:jar "target/testapp.jar"
;;Disable tensor code generation and ensure direct linking.
:jvm-opts ["-Dclojure.compiler.direct-linking=true"
"-Dclojure.spec.skip-macros=true"]}}}}
13 changes: 13 additions & 0 deletions testapp/dev-resources/logback.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<configuration debug="false">
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<!-- encoders are assigned the type
ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>

<root level="info">
<appender-ref ref="STDOUT" />
</root>
</configuration>
11 changes: 11 additions & 0 deletions testapp/shadow-cljs.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{:deps true
;; set an nrepl port for connection to a REPL.
:nrepl {:port 8777}
:builds {;; example build config, usage suitable for user apps
:app {:target :browser
;;module :app will output data to app.js
:modules {:app {:init-fn testapp.webapp/init}}
:output-dir "resources/public/js"
:asset-path "js"
:devtools {:http-root "resources/public"
:http-port 8700}}}}
Loading

0 comments on commit fe15ce0

Please sign in to comment.