Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.14.27](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.27) - 2022-11-04

### Added
- Support for scene-level external evaluation functions
- Support for uploading custom scene-level metrics


## [0.14.26](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.26) - 2022-11-01

### Added
Expand All @@ -27,6 +34,7 @@ dataset.get_scene_from_item_ref_id(some_item['item'].reference_id)
- `slice.type == 'object'` => list of `Annotation`/`Prediction` objects
- `slice.type == 'scene'` => list of `Scene` objects


## [0.14.24](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.24) - 2022-10-19

### Fixed
Expand Down
1 change: 1 addition & 0 deletions nucleus/dataset_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def from_json(cls, payload: dict):

if BACKEND_REFERENCE_ID_KEY in payload:
payload[REFERENCE_ID_KEY] = payload[BACKEND_REFERENCE_ID_KEY]

return cls(
image_location=image_url,
pointcloud_location=pointcloud_url,
Expand Down
5 changes: 4 additions & 1 deletion nucleus/validate/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from nucleus.connection import Connection
from nucleus.job import AsyncJob

from .constants import EVAL_FUNCTION_KEY, SCENARIO_TEST_ID_KEY
from .constants import EVAL_FUNCTION_KEY, SCENARIO_TEST_ID_KEY, EntityLevel
from .data_transfer_objects.eval_function import (
CreateEvalFunction,
EvalFunctionEntry,
Expand Down Expand Up @@ -205,13 +205,15 @@ def metrics(self, model_id: str):
def create_external_eval_function(
self,
name: str,
level: EntityLevel = EntityLevel.ITEM,
) -> EvalFunctionEntry:
"""Creates a new external evaluation function. This external function can be used to upload evaluation
results with functions defined and computed by the customer, without having to share the source code of the
respective function.

Args:
name: unique name of evaluation function
level: level at which the eval function is run, defaults to "item"

Raises:
- NucleusAPIError if the creation of the function fails on the server side
Expand All @@ -228,6 +230,7 @@ def create_external_eval_function(
is_external_function=True,
serialized_fn=None,
raw_source=None,
level=level,
).dict(),
"validate/eval_fn",
)
Expand Down
7 changes: 7 additions & 0 deletions nucleus/validate/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,10 @@ class ThresholdComparison(str, Enum):
GREATER_THAN_EQUAL_TO = "greater_than_equal_to"
LESS_THAN = "less_than"
LESS_THAN_EQUAL_TO = "less_than_equal_to"


class EntityLevel(str, Enum):
"""Level for evaluation functions and unit tests."""

ITEM = "item"
SCENE = "scene"
1 change: 1 addition & 0 deletions nucleus/validate/data_transfer_objects/eval_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class CreateEvalFunction(ImmutableModel):
is_external_function: bool
serialized_fn: Optional[str] = None
raw_source: Optional[str] = None
level: Optional[str] = None

@validator("name")
def name_is_valid(cls, v): # pylint: disable=no-self-argument
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,32 @@
from typing import List
from typing import Optional

from pydantic import validator
from pydantic import root_validator, validator

from nucleus.pydantic_base import ImmutableModel


class EvaluationResult(ImmutableModel):
item_ref_id: str
score: float
item_ref_id: Optional[str] = None
scene_ref_id: Optional[str] = None
score: float = 0
weight: float = 1

@root_validator()
def is_item_or_scene_provided(
cls, values
): # pylint: disable=no-self-argument
if (
values.get("item_ref_id") is None
and values.get("scene_ref_id") is None
) or (
(
values.get("item_ref_id") is not None
and values.get("scene_ref_id") is not None
)
):
raise ValueError("Must provide either item_ref_id or scene_ref_id")
return values

@validator("score", "weight")
def is_normalized(cls, v): # pylint: disable=no-self-argument
if 0 <= v <= 1:
Expand Down
44 changes: 38 additions & 6 deletions nucleus/validate/scenario_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
and have confidence that they’re always shipping the best model.
"""
from dataclasses import dataclass, field
from typing import List, Optional
from typing import List, Optional, Union

from ..connection import Connection
from ..constants import DATASET_ITEMS_KEY, NAME_KEY, SLICE_ID_KEY
from ..constants import DATASET_ITEMS_KEY, NAME_KEY, SCENES_KEY, SLICE_ID_KEY
from ..dataset_item import DatasetItem
from ..scene import Scene
from .constants import (
EVAL_FUNCTION_ID_KEY,
SCENARIO_TEST_ID_KEY,
SCENARIO_TEST_METRICS_KEY,
THRESHOLD_COMPARISON_KEY,
THRESHOLD_KEY,
EntityLevel,
ThresholdComparison,
)
from .data_transfer_objects.scenario_test_evaluations import EvaluationResult
Expand Down Expand Up @@ -162,16 +164,31 @@ def get_eval_history(self) -> List[ScenarioTestEvaluation]:
]
return evaluations

def get_items(self) -> List[DatasetItem]:
def get_items(
self, level: EntityLevel = EntityLevel.ITEM
) -> Union[List[DatasetItem], List[Scene]]:
"""Gets items within a scenario test at a given level, returning a list of DatasetItem or Scene objects.

Args:
level: :class:`EntityLevel`

Returns:
A list of :class:`ScenarioTestEvaluation` objects.
"""
response = self.connection.get(
f"validate/scenario_test/{self.id}/items",
)
if level == EntityLevel.SCENE:
return [
Scene.from_json(scene, skip_validate=True)
for scene in response[SCENES_KEY]
]
return [
DatasetItem.from_json(item) for item in response[DATASET_ITEMS_KEY]
]

def set_baseline_model(self, model_id: str):
"""Set's a new baseline model for the ScenarioTest. In order to be eligible to be a baseline,
"""Sets a new baseline model for the ScenarioTest. In order to be eligible to be a baseline,
this scenario test must have been evaluated using that model. The baseline model's performance
is used as the threshold for all metrics against which other models are compared.

Expand Down Expand Up @@ -205,14 +222,28 @@ def upload_external_evaluation_results(
len(results) > 0
), "Submitting evaluation requires at least one result."

level = EntityLevel.ITEM
metric_per_ref_id = {}
weight_per_ref_id = {}
aggregate_weighted_sum = 0.0
aggregate_weight = 0.0

# aggregation based on https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
for r in results:
metric_per_ref_id[r.item_ref_id] = r.score
weight_per_ref_id[r.item_ref_id] = r.weight
# Ensure results are uploaded ONLY for items or ONLY for scenes
if r.scene_ref_id is not None:
level = EntityLevel.SCENE
if r.item_ref_id is not None and level == EntityLevel.SCENE:
raise ValueError(
"All evaluation results must either pertain to a scene_ref_id or an item_ref_id, not both."
)
ref_id = (
r.item_ref_id if level == EntityLevel.ITEM else r.scene_ref_id
)

# Aggregate scores and weights
metric_per_ref_id[ref_id] = r.score
weight_per_ref_id[ref_id] = r.weight
aggregate_weighted_sum += r.score * r.weight
aggregate_weight += r.weight

Expand All @@ -224,6 +255,7 @@ def upload_external_evaluation_results(
"overall_metric": aggregate_weighted_sum / aggregate_weight,
"model_id": model_id,
"slice_id": self.slice_id,
"level": level.value,
}
response = self.connection.post(
payload,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ exclude = '''

[tool.poetry]
name = "scale-nucleus"
version = "0.14.26"
version = "0.14.27"
description = "The official Python client library for Nucleus, the Data Platform for AI"
license = "MIT"
authors = ["Scale AI Nucleus Team <[email protected]>"]
Expand Down
38 changes: 38 additions & 0 deletions tests/cli/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@ def module_scope_datasets(CLIENT):
yield test_datasets


@pytest.fixture(scope="module")
def module_scope_scene_datasets(CLIENT):
test_scene_datasets = []
for i in range(3):
dataset_name = f"[PyTest] CLI {i} {get_uuid()} (Scene)"
test_scene_datasets.append(
CLIENT.create_dataset(dataset_name, is_scene=True)
)
yield test_scene_datasets


@pytest.fixture(scope="function")
def function_scope_dataset(CLIENT):
dataset = CLIENT.create_dataset(f"[PyTest] Dataset {get_uuid()}")
Expand All @@ -49,6 +60,11 @@ def populated_dataset(module_scope_datasets):
yield module_scope_datasets[0]


@pytest.fixture(scope="module")
def populated_scene_dataset(module_scope_scene_datasets):
yield module_scope_scene_datasets[0]


@pytest.fixture(scope="module")
def model(module_scope_models):
yield module_scope_models[0]
Expand Down Expand Up @@ -76,6 +92,28 @@ def test_slice(populated_dataset, slice_items):
yield slc


@pytest.fixture(scope="module")
def scenes(populated_dataset):
items = make_dataset_items()
populated_dataset.append(items)
yield items


@pytest.fixture(scope="module")
def slice_scenes(scenes):
yield scenes[:2]


@pytest.fixture(scope="module")
def test_scene_slice(populated_scene_dataset, slice_scenes):
slice_name = "[PyTest] CLI Scene Slice"
slc = populated_scene_dataset.create_slice(
name=slice_name,
reference_ids=[scene.reference_id for scene in slice_scenes],
)
yield slc


@pytest.fixture(scope="module")
def annotations(populated_dataset, slice_items):
annotations = create_box_annotations(populated_dataset, slice_items)
Expand Down
7 changes: 7 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)
from nucleus.errors import NucleusAPIError
from nucleus.job import AsyncJob, JobError
from nucleus.scene import LidarScene, VideoScene

from .helpers import (
DATASET_WITH_EMBEDDINGS,
Expand All @@ -36,9 +37,11 @@
TEST_CATEGORY_ANNOTATIONS,
TEST_DATASET_NAME,
TEST_IMG_URLS,
TEST_LIDAR_SCENES,
TEST_MULTICATEGORY_ANNOTATIONS,
TEST_POLYGON_ANNOTATIONS,
TEST_SEGMENTATION_ANNOTATIONS,
TEST_VIDEO_SCENES,
assert_partial_equality,
reference_id_from_url,
)
Expand Down Expand Up @@ -94,6 +97,10 @@ def make_dataset_items():
return ds_items_with_metadata


def make_scenes():
return [VideoScene.from_json(s) for s in TEST_VIDEO_SCENES["scenes"]]


def test_dataset_create_and_delete_no_scene(CLIENT):
# Creation
ds = CLIENT.create_dataset(TEST_DATASET_NAME)
Expand Down
52 changes: 51 additions & 1 deletion tests/validate/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
create_predictions,
get_uuid,
)
from tests.test_dataset import make_dataset_items
from tests.test_dataset import make_dataset_items, make_scenes


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -40,6 +40,56 @@ def test_slice(validate_dataset, slice_items):
yield slc


@pytest.fixture(scope="module")
def module_scope_datasets(CLIENT):
test_datasets = []
for i in range(3):
dataset_name = f"[PyTest] CLI {i} {get_uuid()}"
test_datasets.append(
CLIENT.create_dataset(dataset_name, is_scene=False)
)
yield test_datasets


@pytest.fixture(scope="module")
def module_scope_scene_datasets(CLIENT):
test_scene_datasets = []
for i in range(3):
dataset_name = f"[PyTest] CLI {i} {get_uuid()} (Scene)"
test_scene_datasets.append(
CLIENT.create_dataset(dataset_name, is_scene=True)
)
yield test_scene_datasets


@pytest.fixture(scope="module")
def populated_scene_dataset(module_scope_scene_datasets):
yield module_scope_scene_datasets[0]


@pytest.fixture(scope="module")
def slice_scenes():
scenes = make_scenes()[:1]
yield scenes


@pytest.fixture(scope="module")
def scenes(populated_scene_dataset, slice_scenes):
job = populated_scene_dataset.append(slice_scenes, asynchronous=True)
job.sleep_until_complete()
yield slice_scenes


@pytest.fixture(scope="module")
def test_scene_slice(populated_scene_dataset, scenes):
slice_name = "[PyTest] CLI Scene Slice"
slc = populated_scene_dataset.create_slice(
name=slice_name,
reference_ids=[scene.reference_id for scene in scenes],
)
yield slc


@pytest.fixture(scope="module")
def model(CLIENT):
model_reference = "model_" + str(time.time())
Expand Down
Loading