Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace deleted elements at addition #418

Merged
merged 28 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactoring
  • Loading branch information
Dmitry Yashunin committed Dec 17, 2022
commit 01bd9d08cfa3bb5570fea5ab186e6effefa30849
20 changes: 10 additions & 10 deletions hnswlib/hnswalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
mutable std::atomic<long> metric_distance_computations{0};
mutable std::atomic<long> metric_hops{0};

bool replace_deleted_ = false; // flag to replace deleted elements (marked as deleted) during insertions
bool allow_replace_deleted_ = false; // flag to replace deleted elements (marked as deleted) during insertions

std::mutex deleted_elements_lock; // lock for deleted_elements
std::unordered_set<tableint> deleted_elements; // contains internal ids of deleted elements
Expand All @@ -79,8 +79,8 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
const std::string &location,
bool nmslib = false,
size_t max_elements = 0,
bool replace_deleted = false)
: replace_deleted_(replace_deleted) {
bool allow_replace_deleted = false)
: allow_replace_deleted_(allow_replace_deleted) {
loadIndex(location, s, max_elements);
}

Expand All @@ -91,11 +91,11 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
size_t M = 16,
size_t ef_construction = 200,
size_t random_seed = 100,
bool replace_deleted = false)
bool allow_replace_deleted = false)
: link_list_locks_(max_elements),
label_op_locks_(MAX_LABEL_OPERATION_LOCKS),
element_levels_(max_elements),
replace_deleted_(replace_deleted) {
allow_replace_deleted_(allow_replace_deleted) {
max_elements_ = max_elements;
num_deleted_ = 0;
data_size_ = s->get_data_size();
Expand Down Expand Up @@ -707,7 +707,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
for (size_t i = 0; i < cur_element_count; i++) {
if (isMarkedDeleted(i)) {
num_deleted_ += 1;
if (replace_deleted_) deleted_elements.insert(i);
if (allow_replace_deleted_) deleted_elements.insert(i);
}
}

Expand Down Expand Up @@ -771,7 +771,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId))+2;
*ll_cur |= DELETE_MARK;
num_deleted_ += 1;
if (replace_deleted_) {
if (allow_replace_deleted_) {
std::unique_lock <std::mutex> lock_deleted_elements(deleted_elements_lock);
deleted_elements.insert(internalId);
}
Expand Down Expand Up @@ -813,7 +813,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId)) + 2;
*ll_cur &= ~DELETE_MARK;
num_deleted_ -= 1;
if (replace_deleted_) {
if (allow_replace_deleted_) {
std::unique_lock <std::mutex> lock_deleted_elements(deleted_elements_lock);
deleted_elements.erase(internalId);
}
Expand Down Expand Up @@ -847,7 +847,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
* If replacement of deleted elements is enabled: replaces previously deleted point if any, updating it with new point
*/
void addPoint(const void *data_point, labeltype label, bool replace_deleted = false) {
if ((replace_deleted_ == false) && (replace_deleted == true)) {
if ((allow_replace_deleted_ == false) && (replace_deleted == true)) {
throw std::runtime_error("Replacement of deleted elements is disabled in constructor");
}

Expand Down Expand Up @@ -1053,7 +1053,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
auto search = label_lookup_.find(label);
if (search != label_lookup_.end()) {
tableint existingInternalId = search->second;
if (replace_deleted_) {
if (allow_replace_deleted_) {
if (isMarkedDeleted(existingInternalId)) {
throw std::runtime_error("Can't use addPoint to update deleted elements if replacement of deleted elements is enabled.");
}
Expand Down
24 changes: 12 additions & 12 deletions python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,12 @@ class Index {
size_t M,
size_t efConstruction,
size_t random_seed,
bool replace_deleted) {
bool allow_replace_deleted) {
if (appr_alg) {
throw std::runtime_error("The index is already initiated.");
}
cur_l = 0;
appr_alg = new hnswlib::HierarchicalNSW<dist_t>(l2space, maxElements, M, efConstruction, random_seed, replace_deleted);
appr_alg = new hnswlib::HierarchicalNSW<dist_t>(l2space, maxElements, M, efConstruction, random_seed, allow_replace_deleted);
index_inited = true;
ep_added = false;
appr_alg->ef_ = default_ef;
Expand All @@ -224,12 +224,12 @@ class Index {
}


void loadIndex(const std::string &path_to_index, size_t max_elements, bool replace_deleted) {
void loadIndex(const std::string &path_to_index, size_t max_elements, bool allow_replace_deleted) {
if (appr_alg) {
std::cerr << "Warning: Calling load_index for an already inited index. Old index is being deallocated." << std::endl;
delete appr_alg;
}
appr_alg = new hnswlib::HierarchicalNSW<dist_t>(l2space, path_to_index, false, max_elements, replace_deleted);
appr_alg = new hnswlib::HierarchicalNSW<dist_t>(l2space, path_to_index, false, max_elements, allow_replace_deleted);
cur_l = appr_alg->cur_element_count;
index_inited = true;
}
Expand Down Expand Up @@ -415,7 +415,7 @@ class Index {
"ef"_a = appr_alg->ef_,
"has_deletions"_a = (bool)appr_alg->num_deleted_,
"size_links_per_element"_a = appr_alg->size_links_per_element_,
"replace_deleted"_a = appr_alg->replace_deleted_,
"allow_replace_deleted"_a = appr_alg->allow_replace_deleted_,

"label_lookup_external"_a = py::array_t<hnswlib::labeltype>(
{ appr_alg->label_lookup_.size() }, // shape
Expand Down Expand Up @@ -578,19 +578,19 @@ class Index {
}

// process deleted elements
bool replace_deleted = false;
if (d.contains("replace_deleted")) {
replace_deleted = d["replace_deleted"].cast<bool>();
bool allow_replace_deleted = false;
if (d.contains("allow_replace_deleted")) {
allow_replace_deleted = d["allow_replace_deleted"].cast<bool>();
}
appr_alg->replace_deleted_= replace_deleted;
appr_alg->allow_replace_deleted_= allow_replace_deleted;

appr_alg->num_deleted_ = 0;
bool has_deletions = d["has_deletions"].cast<bool>();
if (has_deletions) {
for (size_t i = 0; i < appr_alg->cur_element_count; i++) {
if (appr_alg->isMarkedDeleted(i)) {
appr_alg->num_deleted_ += 1;
if (replace_deleted) appr_alg->deleted_elements.insert(i);
if (allow_replace_deleted) appr_alg->deleted_elements.insert(i);
}
}
}
Expand Down Expand Up @@ -886,7 +886,7 @@ PYBIND11_PLUGIN(hnswlib) {
py::arg("M") = 16,
py::arg("ef_construction") = 200,
py::arg("random_seed") = 100,
py::arg("replace_deleted") = false)
py::arg("allow_replace_deleted") = false)
.def("knn_query",
&Index<float>::knnQuery_return_numpy,
py::arg("data"),
Expand All @@ -908,7 +908,7 @@ PYBIND11_PLUGIN(hnswlib) {
&Index<float>::loadIndex,
py::arg("path_to_index"),
py::arg("max_elements") = 0,
py::arg("replace_deleted") = false)
py::arg("allow_replace_deleted") = false)
.def("mark_deleted", &Index<float>::markDeleted, py::arg("label"))
.def("unmark_deleted", &Index<float>::unmarkDeleted, py::arg("label"))
.def("resize_index", &Index<float>::resizeIndex, py::arg("new_size"))
Expand Down
4 changes: 2 additions & 2 deletions python_bindings/tests/bindings_test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def testRandomSelf(self):

# Declaring index
hnsw_index = hnswlib.Index(space='l2', dim=dim)
hnsw_index.init_index(max_elements=max_num_elements, ef_construction=200, M=16, replace_deleted=True)
hnsw_index.init_index(max_elements=max_num_elements, ef_construction=200, M=16, allow_replace_deleted=True)

hnsw_index.set_ef(100)
hnsw_index.set_num_threads(4)
Expand Down Expand Up @@ -106,7 +106,7 @@ def testRandomSelf(self):
hnsw_index = hnswlib.Index(space='l2', dim=dim) # the space can be changed - keeps the data, alters the distance function.
hnsw_index.set_num_threads(4)
print(f"Loading index from {index_path}")
hnsw_index.load_index(index_path, max_elements=max_num_elements, replace_deleted=True)
hnsw_index.load_index(index_path, max_elements=max_num_elements, allow_replace_deleted=True)

# Insert batch 4
print("Inserting batch 4 by replacing deleted elements")
Expand Down
2 changes: 1 addition & 1 deletion python_bindings/tests/bindings_test_stress_mt_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def testRandomSelf(self):
# Declaring index
for _ in range(100):
hnsw_index = hnswlib.Index(space='l2', dim=dim)
hnsw_index.init_index(max_elements=max_num_elements, ef_construction=200, M=16, replace_deleted=True)
hnsw_index.init_index(max_elements=max_num_elements, ef_construction=200, M=16, allow_replace_deleted=True)

hnsw_index.set_ef(100)
hnsw_index.set_num_threads(50)
Expand Down