From c8468e6b2be3b27092d961efa45a63d3b732547e Mon Sep 17 00:00:00 2001 From: Chris Nuernberger Date: Fri, 29 Oct 2021 15:34:50 -0600 Subject: [PATCH] snap --- deps.edn | 4 +-- docs/index.html | 2 +- docs/tech.v3.dataset.html | 38 ++++++++++----------- docs/tech.v3.datatype.argops.html | 14 ++++---- docs/tech.v3.datatype.functional.html | 2 +- docs/tech.v3.datatype.html | 2 +- docs/tech.v3.libs.cljs-ajax.html | 2 +- src/tech/v3/datatype/argops.cljs | 49 ++++++++++----------------- test/tech/v3/dataset_test.cljs | 21 ++++++++++++ 9 files changed, 71 insertions(+), 63 deletions(-) diff --git a/deps.edn b/deps.edn index 5ba33c3..343f8c8 100644 --- a/deps.edn +++ b/deps.edn @@ -1,7 +1,7 @@ {:paths ["src"] ;;We serialize datasets to transit-json :deps {com.cognitect/transit-cljs {:mvn/version "0.8.269"} - techascent/tech.ml.dataset {:mvn/version "6.025"} + techascent/tech.ml.dataset {:mvn/version "6.026"} tick/tick {:mvn/version "0.4.24-alpha"} com.cognitect/transit-clj {:mvn/version "1.0.324"}} :aliases @@ -21,7 +21,7 @@ :exec-fn hf.depstar/jar :exec-args {:group-id "com.cnuernber" :artifact-id "tmdjs" - :version "1.000-beta-19" + :version "1.000-beta-21-SNAPSHOT" :sync-pom true :jar "target/tmdjs.jar"}} ;;deploy to clojars diff --git a/docs/index.html b/docs/index.html index 394c76a..24e9ce9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,6 +1,6 @@ - 1.000-beta-19

1.000-beta-19

Dataframe processing for ClojureScript.

Namespaces

tech.v3.dataset

Dataframe (map of columns) data processing system for clojurescript. + 1.000-beta-20

1.000-beta-20

Dataframe processing for ClojureScript.

Namespaces

tech.v3.datatype

Support for programming with arrays and a fast set implementation for indexe (int32) values. diff --git a/docs/tech.v3.dataset.html b/docs/tech.v3.dataset.html index c5ba20b..a2df718 100644 --- a/docs/tech.v3.dataset.html +++ b/docs/tech.v3.dataset.html @@ -1,6 +1,6 @@ -tech.v3.dataset documentation

tech.v3.dataset

Dataframe (map of columns) data processing system for clojurescript. +tech.v3.dataset documentation

tech.v3.dataset

Dataframe (map of columns) data processing system for clojurescript. This API is a simplified version of the jvm-version's api.

Datasets are maps of columns so assoc will add a new column and dissoc @@ -46,7 +46,7 @@

column

(column ds k)

Return the column at positing k. Failing to find the column is an error.

column->data

(column->data col)

Transform a column in raw data safe for passing to transit or edn.

-

column-count

(column-count ds)

Integer column count of the dataset.

+

column-count

(column-count ds)

Integer column count of the dataset.

column-map

(column-map dataset result-colname map-fn res-dtype-or-opts filter-fn-or-ds)(column-map dataset result-colname map-fn filter-fn-or-ds)(column-map dataset result-colname map-fn)

Produce a new (or updated) column as the result of mapping a fn over columns.

  • dataset - dataset.
  • @@ -145,16 +145,16 @@ user> (ds/missing (*1 :c)) {0,1} -

column-names

(column-names ds)

Return the column names as a sequence.

+

column-names

(column-names ds)

Return the column names as a sequence.

columns

(columns ds)

Return the columns, in order, of the dataset.

-

concat

(concat ds & args)

This is a copying concatenation so the result will be realized. Missing columns +

concat

(concat ds & args)(concat)

This is a copying concatenation so the result will be realized. Missing columns will be filled in with missing values.

data->column

(data->column {:keys [metadata missing data]})

Transform data produced via column->data into a column

-

data->dataset

(data->dataset ds-data)

Given data produced via dataset->data create a new dataset.

-

dataset->data

(dataset->data ds)

Convert a dataset into a pure data datastructure save for transit or direct json +

data->dataset

(data->dataset ds-data)

Given data produced via dataset->data create a new dataset.

+

dataset->data

(dataset->data ds)

Convert a dataset into a pure data datastructure save for transit or direct json serialization. Uses base64 encoding of numeric data.

-

dataset->transit-str

(dataset->transit-str ds & [format handlers])

Write a transit string adding in the dataset write handler

-

dataset?

(dataset? ds)

Return true of this is a dataset.

+

dataset->transit-str

(dataset->transit-str ds & [format handlers])

Write a transit string adding in the dataset write handler

+

dataset?

(dataset? ds)

Return true of this is a dataset.

filter

(filter ds pred)

Filter the dataset. Pred gets passed each row as a map.

filter-column

(filter-column ds colname & [pred])

Filter the dataset by column colname. If pred isn't passed in the column's values are treated as truthy.

@@ -166,15 +166,15 @@
  • If filter-fn-or-ds is :all, all columns are returned
  • If filter-fn-or-ds is an instance of IFn, the dataset is passed into it.
  • -

    group-by

    (group-by ds f)

    Group the dataset by the values returned from passing f over each row, represented as a +

    group-by

    (group-by ds f)

    Group the dataset by the values returned from passing f over each row, represented as a map, of the dataset.

    group-by-column

    (group-by-column ds colname)

    Group the dataset by column colname

    head

    (head ds n)(head ds)

    Return the first n rows of the dataset.

    intersect-missing-sets

    (intersect-missing-sets col-seq)

    Intersect the missing sets of the columns

    -

    merge-by-column

    (merge-by-column lhs rhs colname)

    Merge rows assuming left, right have the same columns. Left is taken first then +

    merge-by-column

    (merge-by-column lhs rhs colname)

    Merge rows assuming left, right have the same columns. Left is taken first then any right not appear with left are appended. This is far less general but much faster than a join operation; it is useful for merging timeseries data.

    -

    missing

    (missing ds-or-col)

    Return the missing set as a clojure set. The underlying protocol returns +

    missing

    (missing ds-or-col)

    Return the missing set as a clojure set. The underlying protocol returns missing sets as js sets as those have superior performance when using numbers.

    remove-columns

    (remove-columns ds colnames)

    Remove these columns from the dataset.

    remove-rows

    (remove-rows ds rowidxs)

    Remove these row indexes out of the dataset.

    @@ -189,7 +189,7 @@ in the missing span and the number of missing elements. Either the first or last may be nil if the missing span is at the beginning or end. In the case where all values are missing both arguments may be nil.

    -

    row-at

    (row-at ds idx)

    Get row as a map at index idx. Negative indexes index from the end.

    +

    row-at

    (row-at ds idx)

    Get row as a map at index idx. Negative indexes index from the end.

    row-count

    (row-count ds-or-col)

    Integer row count of the dataset.

    row-map

    (row-map ds map-fn & [options])

    Map a function across the rows of the dataset producing a new dataset that is merged back into the original potentially replacing existing columns. @@ -219,7 +219,7 @@ | :MSFT | 2000-04-01 | 28.37 | 804.85690000 | | :MSFT | 2000-05-01 | 25.45 | 647.70250000 |] -

    rows

    (rows ds)

    Get a sequence of maps from a dataset

    +

    rows

    (rows ds)

    Get a sequence of maps from a dataset

    rowvec-at

    (rowvec-at ds idx)

    Get row as a vec of values at index idx. Negative indexes index from the end.

    rowvecs

    (rowvecs ds)

    Get a sequence of persistent vectors from a dataset

    select

    (select ds cols rows)

    Select a subrect of the dataset.

    @@ -243,11 +243,11 @@

    tail

    (tail ds n)(tail ds)

    Return the last n rows of the dataset.

    transit-file->dataset

    (transit-file->dataset fname)

    Given a file of transit data return a dataset. This only works on Node.

    -

    transit-read-handler-map

    (transit-read-handler-map)

    Return a map mapping the dataset tag to a transit read handler.

    -

    transit-str->dataset

    (transit-str->dataset json-data & [format handlers])

    Parse a transit string adding in the dataset read handler

    -

    transit-write-handler-map

    (transit-write-handler-map)

    Return a map mapping the dataset type to a transit writer handler.

    -

    union-missing-sets

    (union-missing-sets col-seq)

    Union the missing sets of the columns

    -

    unique-by

    (unique-by ds f)

    Unique-by taking first

    +

    transit-read-handler-map

    (transit-read-handler-map)

    Return a map mapping the dataset tag to a transit read handler.

    +

    transit-str->dataset

    (transit-str->dataset json-data & [format handlers])

    Parse a transit string adding in the dataset read handler

    +

    transit-write-handler-map

    (transit-write-handler-map)

    Return a map mapping the dataset type to a transit writer handler.

    +

    union-missing-sets

    (union-missing-sets col-seq)

    Union the missing sets of the columns

    +

    unique-by

    (unique-by ds f)

    Unique-by taking first

    unique-by-column

    (unique-by-column ds colname)

    Unique-by taking first

    update

    (update lhs-ds filter-fn-or-ds update-fn & args)

    Update this dataset. Filters this dataset into a new dataset, applies update-fn, then merges the result into original dataset.

    @@ -267,4 +267,4 @@ (ds/update-columnwise (cf/union (cf/numeric ds) (cf/boolean ds)) #(dtype/elemwise-cast % :float64))) -
    \ No newline at end of file +
    \ No newline at end of file diff --git a/docs/tech.v3.datatype.argops.html b/docs/tech.v3.datatype.argops.html index 533a98d..ff88c6b 100644 --- a/docs/tech.v3.datatype.argops.html +++ b/docs/tech.v3.datatype.argops.html @@ -1,13 +1,13 @@ -tech.v3.datatype.argops documentation

    tech.v3.datatype.argops

    Index-space algorithms. Implements a subset of the jvm-version.

    +tech.v3.datatype.argops documentation

    tech.v3.datatype.argops

    Index-space algorithms. Implements a subset of the jvm-version.

    argfilter

    (argfilter pred data)(argfilter data)

    Return an array of indexes that pass the filter.

    -

    arggroup

    (arggroup data)

    Return a map from value->indexes that hold that value.

    -

    arglast-every

    (arglast-every rdr pred)

    Return the last index where (pred (rdr idx) (rdr (dec idx))) was true by +

    arggroup

    (arggroup data)

    Return a map from value->indexes that hold that value.

    +

    arglast-every

    (arglast-every rdr pred)

    Return the last index where (pred (rdr idx) (rdr (dec idx))) was true by comparing every value and keeping track of the last index where pred was true.

    -

    argmax

    (argmax rdr)

    Return the last index of the max item in the reader.

    -

    argmin

    (argmin rdr)

    Return the last index of the min item in the reader.

    -

    argsort

    (argsort compare-fn options data)(argsort compare-fn data)(argsort data)

    Return an array of indexes that order the provided data by compare-fn. compare-fn must +

    argmax

    (argmax rdr)

    Return the last index of the max item in the reader.

    +

    argmin

    (argmin rdr)

    Return the last index of the min item in the reader.

    +

    argsort

    (argsort compare-fn options data)(argsort compare-fn data)(argsort data)

    Return an array of indexes that order the provided data by compare-fn. compare-fn must be a boolean function such as < or >. You can use a full custom comparator returning -1,0 or 1 by using the :comparator option.

      @@ -43,4 +43,4 @@
      • :comparator - a specific comparator to use; defaults to comparator.
      -
    \ No newline at end of file +
    \ No newline at end of file diff --git a/docs/tech.v3.datatype.functional.html b/docs/tech.v3.datatype.functional.html index 874b8d3..4e93b27 100644 --- a/docs/tech.v3.datatype.functional.html +++ b/docs/tech.v3.datatype.functional.html @@ -1,6 +1,6 @@ -tech.v3.datatype.functional documentation

    tech.v3.datatype.functional

    Simple math primitives.

    +tech.v3.datatype.functional documentation

    tech.v3.datatype.functional

    Simple math primitives.

    descriptive-statistics

    (descriptive-statistics stats v)

    Given a sequence of desired stats return a map of statname->value.

    Example:

    cljs.user> (dfn/descriptive-statistics [:min :max :mean :n-values] (range 10))
    diff --git a/docs/tech.v3.datatype.html b/docs/tech.v3.datatype.html
    index 3cccff9..358f577 100644
    --- a/docs/tech.v3.datatype.html
    +++ b/docs/tech.v3.datatype.html
    @@ -1,6 +1,6 @@
     
    -tech.v3.datatype documentation

    tech.v3.datatype

    Support for programming with arrays and a fast set implementation for indexe (int32) values. +tech.v3.datatype documentation

    tech.v3.datatype

    Support for programming with arrays and a fast set implementation for indexe (int32) values. For complex/higher order algorithms see tech.v3.datatype.argops. For mathematical primitives, see tech.v3.datatype.functional

    ->js-set

    (->js-set)(->js-set data)

    Create a javascript set. These have superior performance when dealing with numeric diff --git a/docs/tech.v3.libs.cljs-ajax.html b/docs/tech.v3.libs.cljs-ajax.html index e0972c3..64a80c0 100644 --- a/docs/tech.v3.libs.cljs-ajax.html +++ b/docs/tech.v3.libs.cljs-ajax.html @@ -1,6 +1,6 @@ -tech.v3.libs.cljs-ajax documentation

    tech.v3.libs.cljs-ajax

    Bindings to use the dataset handlers in cljs GET/POST calls.

    +tech.v3.libs.cljs-ajax documentation

    tech.v3.libs.cljs-ajax

    Bindings to use the dataset handlers in cljs GET/POST calls.

    add-java-time-handlers!

    (add-java-time-handlers!)

    Add handlers for java.time.LocalDate and java.time.Instant

    add-transit-io-handlers!

    (add-transit-io-handlers! datatype tag read-fn write-fn)

    GET

    (GET url options)

    Drop in replacement for cljs-ajax.core/GET

    opt-map

    (opt-map)

    Options map that must be included in the cljs-ajax request in order diff --git a/src/tech/v3/datatype/argops.cljs b/src/tech/v3/datatype/argops.cljs index 6dafa25..a1d3433 100644 --- a/src/tech/v3/datatype/argops.cljs +++ b/src/tech/v3/datatype/argops.cljs @@ -54,37 +54,24 @@ cljs.user> (argops/argsort nil ;;no compare fn nan-strategy (get options :nan-strategy :last)] ;;agetable is a major optimization for sorting. element access time means a lot ;;for a large nlogn op. - (if-let [data (dt-base/as-agetable data)] - (let [sort-fn (if (casting/numeric-type? (dt-base/elemwise-datatype data)) - (fn [lhs-idx rhs-idx] - (let [lhs (aget data lhs-idx) - rhs (aget data rhs-idx) - lhs-nan? (js/isNaN lhs) - rhs-nan? (js/isNaN rhs)] - (if (or lhs-nan? rhs-nan?) - (condp = nan-strategy - :exception - (throw (js/Error "NaN detected")) - :last (if lhs-nan? 1 -1) - :first (if lhs-nan? -1 1)) - (comp lhs rhs)))) - #(comp (aget data %1) (aget data %2)))] - (.sort idx-ary sort-fn)) - (let [sort-fn (if (casting/numeric-type? (dt-base/elemwise-datatype data)) - (fn [lhs-idx rhs-idx] - (let [lhs (nth data lhs-idx) - rhs (nth data rhs-idx) - lhs-nan? (js/isNaN lhs) - rhs-nan? (js/isNaN rhs)] - (if (or lhs-nan? rhs-nan?) - (condp = nan-strategy - :exception - (throw (js/Error "NaN detected")) - :last (if lhs-nan? 1 -1) - :first (if lhs-nan? -1 1 )) - (comp lhs rhs)))) - #(comp (nth data %1) (nth data %2)))] - (.sort idx-ary sort-fn))) + (let [aget-data (dt-base/as-agetable data) + get-fn (if aget-data aget nth) + missing? (if (casting/numeric-type? (dt-base/elemwise-datatype data)) + js/isNaN + nil?) + sort-fn (fn [lhs-idx rhs-idx] + (let [lhs (get-fn data lhs-idx) + rhs (get-fn data rhs-idx) + lhs-nan? (missing? lhs) + rhs-nan? (missing? rhs)] + (if (or lhs-nan? rhs-nan?) + (condp = nan-strategy + :exception + (throw (js/Error "NaN detected")) + :last (if lhs-nan? 1 -1) + :first (if lhs-nan? -1 1)) + (comp lhs rhs))))] + (.sort idx-ary sort-fn)) indexes)) ([compare-fn data] (argsort compare-fn nil data)) diff --git a/test/tech/v3/dataset_test.cljs b/test/tech/v3/dataset_test.cljs index d2dd0d9..caab8d8 100644 --- a/test/tech/v3/dataset_test.cljs +++ b/test/tech/v3/dataset_test.cljs @@ -219,3 +219,24 @@ (deftest ds-concat-nil-seq (is (nil? (apply ds/concat nil)))) + + +(deftest sorting-objects + (let [ds (ds/->dataset {:a [nil nil 1.0 2.0] + :b ["hey" "you" nil nil]})] + (is (= ["hey" "you" nil nil] + (->> (ds/sort-by-column ds :b nil {:nan-strategy :last}) + :b + (vec)))) + (is (= [nil nil "hey" "you"] + (->> (ds/sort-by-column ds :b nil {:nan-strategy :first}) + :b + (vec)))) + (is (nan-eq [1 2 ##NaN ##NaN] + (->> (ds/sort-by-column ds :a nil {:nan-strategy :last}) + :a + (vec)))) + (is (nan-eq [##NaN ##NaN 1 2] + (->> (ds/sort-by-column ds :a nil {:nan-strategy :first}) + :a + (vec))))))