Skip to content

Commit

Permalink
fix: fix storage options for dataset builder
Browse files Browse the repository at this point in the history
  • Loading branch information
chenkovsky committed Nov 23, 2024
1 parent 1d3b204 commit 1123e5e
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
19 changes: 19 additions & 0 deletions python/python/tests/test_fragment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The Lance Authors

import copy
import json
import multiprocessing
import uuid
Expand All @@ -23,6 +24,15 @@
from lance.fragment import write_fragments
from lance.progress import FileSystemFragmentWriteProgress

CONFIG = {
"allow_http": "true",
"aws_access_key_id": "ACCESSKEY",
"aws_secret_access_key": "SECRETKEY",
"aws_endpoint": "http://localhost:9000",
"dynamodb_endpoint": "http://localhost:8000",
"aws_region": "us-west-2",
}


def test_write_fragment(tmp_path: Path):
with pytest.raises(OSError):
Expand Down Expand Up @@ -354,3 +364,12 @@ def test_create_from_file(tmp_path):
assert dataset.count_rows() == 1600
assert len(dataset.get_fragments()) == 1
assert dataset.get_fragments()[0].fragment_id == 2


@pytest.mark.integration
def test_append(s3_bucket: str):
storage_options = copy.deepcopy(CONFIG)
table = pa.table({"a": [1, 2], "b": ["a", "b"]})
lance.fragment.LanceFragment.create(
f"s3://{s3_bucket}/test_append.lance", table, storage_options=storage_options
)
10 changes: 9 additions & 1 deletion rust/lance/src/dataset/fragment/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,15 @@ impl<'a> FragmentCreateBuilder<'a> {
}

async fn existing_dataset_schema(&self) -> Result<Option<Schema>> {
match DatasetBuilder::from_uri(self.dataset_uri).load().await {
let mut builder = DatasetBuilder::from_uri(self.dataset_uri);
let storage_options = self
.write_params
.and_then(|p| p.store_params.as_ref())
.and_then(|p| p.storage_options.clone());
if let Some(storage_options) = storage_options {
builder = builder.with_storage_options(storage_options);
}
match builder.load().await {
Ok(dataset) => {
// Use the schema from the dataset, because it has the correct
// field ids.
Expand Down

0 comments on commit 1123e5e

Please sign in to comment.