Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace deleted elements at addition #418

Merged
merged 28 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove previous element update locks as now we have locks by label
  • Loading branch information
Dmitry Yashunin committed Nov 26, 2022
commit c750df8385354a2bb003c04bec5ea99a55972e71
20 changes: 0 additions & 20 deletions hnswlib/hnswalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ typedef unsigned int linklistsizeint;
template<typename dist_t>
class HierarchicalNSW : public AlgorithmInterface<dist_t> {
public:
static const tableint MAX_ELEMENT_UPDATE_LOCKS = 65536;
static const tableint MAX_LABEL_OPERATION_LOCKS = 65536;
static const unsigned char DELETE_MARK = 0x01;

Expand All @@ -36,10 +35,6 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {

VisitedListPool *visited_list_pool_{nullptr};

// Locks to prevent race condition during update/insert of an element at same time.
// Note: Locks for additions can also be used to prevent this race condition
// if the querying of KNN is not exposed along with update/inserts i.e multithread insert/update/query in parallel.
mutable std::vector<std::mutex> element_update_locks_;
// Locks operations with element by label value
mutable std::vector<std::mutex> label_op_locks_;

Expand Down Expand Up @@ -98,7 +93,6 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
size_t random_seed = 100,
bool replace_deleted = false)
: link_list_locks_(max_elements),
element_update_locks_(MAX_ELEMENT_UPDATE_LOCKS),
label_op_locks_(MAX_LABEL_OPERATION_LOCKS),
element_levels_(max_elements),
replace_deleted_(replace_deleted) {
Expand Down Expand Up @@ -167,13 +161,6 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
}


inline std::mutex& getUpdateElMutex(tableint internal_id) const {
// calculate hash
size_t lock_id = internal_id & (MAX_ELEMENT_UPDATE_LOCKS - 1);
return element_update_locks_[lock_id];
}


inline std::mutex& getLabelOpMutex(labeltype label) const {
// calculate hash
size_t lock_id = label & (MAX_LABEL_OPERATION_LOCKS - 1);
Expand Down Expand Up @@ -691,7 +678,6 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {

size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint);
std::vector<std::mutex>(max_elements).swap(link_list_locks_);
std::vector<std::mutex>(MAX_ELEMENT_UPDATE_LOCKS).swap(element_update_locks_);
std::vector<std::mutex>(MAX_LABEL_OPERATION_LOCKS).swap(label_op_locks_);

visited_list_pool_ = new VisitedListPool(1, max_elements);
Expand Down Expand Up @@ -1081,16 +1067,12 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
if (search != label_lookup_.end()) {
tableint existingInternalId = search->second;
if (replace_deleted_) {
// wait for element addition or update
std::unique_lock <std::mutex> lock_el_update(getUpdateElMutex(existingInternalId));
if (isMarkedDeleted(existingInternalId)) {
throw std::runtime_error("Can't use addPoint to update deleted elements if replacement of deleted elements is enabled.");
}
}
lock_table.unlock();

std::unique_lock <std::mutex> lock_el_update(getUpdateElMutex(existingInternalId));

if (isMarkedDeleted(existingInternalId)) {
unmarkDeletedInternal(existingInternalId);
}
Expand All @@ -1108,8 +1090,6 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
label_lookup_[label] = cur_c;
}

// Take update lock to prevent race conditions on an element with insertion/update at the same time.
std::unique_lock <std::mutex> lock_el_update(getUpdateElMutex(cur_c));
std::unique_lock <std::mutex> lock_el(link_list_locks_[cur_c]);
int curlevel = getRandomLevel(mult_);
if (level > 0)
Expand Down
2 changes: 1 addition & 1 deletion python_bindings/tests/bindings_test_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def testRandomSelf(self):

# Set number of threads used during batch search/construction in hnsw
# By default using all available cores
hnsw_index.set_num_threads(1)
hnsw_index.set_num_threads(4)

print("Adding batch of %d elements" % (len(data)))
hnsw_index.add_items(data)
Expand Down