Skip to content

Commit

Permalink
Merge pull request #371 from nmslib/develop
Browse files Browse the repository at this point in the history
Merge 0.6.2 to master
  • Loading branch information
yurymalkov authored Feb 14, 2022
2 parents 21e20f3 + 9d933ac commit 7cc0ecb
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 36 deletions.
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ Header-only C++ HNSW implementation with python bindings.
**NEWS:**


**version 0.6.2**

* Fixed a bug in saving of large pickles. The pickles with > 4GB could have been corrupted. Thanks Kai Wohlfahrt for reporting.
* Thanks to ([@GuyAv46](https://github.com/GuyAv46)) hnswlib inner product now is more consitent accross architectures (SSE, AVX, etc).
*

**version 0.6.1**

* Thanks to ([@tony-kuo](https://github.com/tony-kuo)) hnswlib AVX512 and AVX builds are not backwards-compatible with older SSE and non-AVX512 architectures.
Expand Down Expand Up @@ -235,6 +241,9 @@ or you can install via pip:


### For developers
Contributions are highly welcome!

Please make pull requests against the `develop` branch.

When making changes please run tests (and please add a test to `python_bindings/tests` in case there is new functionality):
```bash
Expand All @@ -259,10 +268,6 @@ https://github.com/dbaranchuk/ivf-hnsw
* .Net implementation: https://github.com/microsoft/HNSW.Net
* CUDA implementation: https://github.com/js1010/cuhnsw
### Contributing to the repository
Contributions are highly welcome!
Please make pull requests against the `develop` branch.
### 200M SIFT test reproduction
To download and extract the bigann dataset (from root directory):
Expand Down
84 changes: 61 additions & 23 deletions hnswlib/space_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ namespace hnswlib {
for (unsigned i = 0; i < qty; i++) {
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
}
return (1.0f - res);
return res;

}

static float
InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
}

#if defined(USE_AVX)

// Favor using AVX if available.
Expand Down Expand Up @@ -61,8 +66,13 @@ namespace hnswlib {

_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
return 1.0f - sum;
}
return sum;
}

static float
InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
}

#endif

Expand Down Expand Up @@ -121,7 +131,12 @@ namespace hnswlib {
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
}

#endif
Expand Down Expand Up @@ -156,7 +171,12 @@ namespace hnswlib {
_mm512_store_ps(TmpRes, sum512);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
}

#endif
Expand Down Expand Up @@ -196,15 +216,20 @@ namespace hnswlib {
_mm256_store_ps(TmpRes, sum256);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
}

#endif

#if defined(USE_SSE)

static float
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
static float
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
float PORTABLE_ALIGN32 TmpRes[8];
float *pVect1 = (float *) pVect1v;
float *pVect2 = (float *) pVect2v;
Expand Down Expand Up @@ -245,17 +270,24 @@ namespace hnswlib {
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];

return 1.0f - sum;
return sum;
}

static float
InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
}

#endif

#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;

static float
InnerProductSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
size_t qty16 = qty >> 4 << 4;
float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
Expand All @@ -264,11 +296,11 @@ namespace hnswlib {

size_t qty_left = qty - qty16;
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
return res + res_tail - 1.0f;
return 1.0f - (res + res_tail);
}

static float
InnerProductSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
size_t qty = *((size_t *) qty_ptr);
size_t qty4 = qty >> 2 << 2;

Expand All @@ -279,7 +311,7 @@ namespace hnswlib {
float *pVect2 = (float *) pVect2v + qty4;
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);

return res + res_tail - 1.0f;
return 1.0f - (res + res_tail);
}
#endif

Expand All @@ -290,30 +322,37 @@ namespace hnswlib {
size_t dim_;
public:
InnerProductSpace(size_t dim) {
fstdistfunc_ = InnerProduct;
fstdistfunc_ = InnerProductDistance;
#if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
#if defined(USE_AVX512)
if (AVX512Capable())
if (AVX512Capable()) {
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
else if (AVXCapable())
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
} else if (AVXCapable()) {
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
}
#elif defined(USE_AVX)
if (AVXCapable())
if (AVXCapable()) {
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
}
#endif
#if defined(USE_AVX)
if (AVXCapable())
if (AVXCapable()) {
InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
}
#endif

if (dim % 16 == 0)
fstdistfunc_ = InnerProductSIMD16Ext;
fstdistfunc_ = InnerProductDistanceSIMD16Ext;
else if (dim % 4 == 0)
fstdistfunc_ = InnerProductSIMD4Ext;
fstdistfunc_ = InnerProductDistanceSIMD4Ext;
else if (dim > 16)
fstdistfunc_ = InnerProductSIMD16ExtResiduals;
fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
else if (dim > 4)
fstdistfunc_ = InnerProductSIMD4ExtResiduals;
fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
#endif
dim_ = dim;
data_size_ = dim * sizeof(float);
Expand All @@ -334,5 +373,4 @@ namespace hnswlib {
~InnerProductSpace() {}
};


}
18 changes: 9 additions & 9 deletions python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ class Index {
py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */
std::unique_lock <std::mutex> templock(appr_alg->global);

unsigned int level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_;
unsigned int link_npy_size = 0;
std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
size_t level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_;
size_t link_npy_size = 0;
std::vector<size_t> link_npy_offsets(appr_alg->cur_element_count);

for (size_t i = 0; i < appr_alg->cur_element_count; i++){
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
link_npy_offsets[i]=link_npy_size;
if (linkListSize)
link_npy_size += linkListSize;
Expand Down Expand Up @@ -326,7 +326,7 @@ class Index {
memcpy(element_levels_npy, appr_alg->element_levels_.data(), appr_alg->element_levels_.size() * sizeof(int));

for (size_t i = 0; i < appr_alg->cur_element_count; i++){
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
if (linkListSize){
memcpy(link_list_npy+link_npy_offsets[i], appr_alg->linkLists_[i], linkListSize);
}
Expand Down Expand Up @@ -500,11 +500,11 @@ class Index {

memcpy(appr_alg->element_levels_.data(), element_levels_npy.data(), element_levels_npy.nbytes());

unsigned int link_npy_size = 0;
std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
size_t link_npy_size = 0;
std::vector<size_t> link_npy_offsets(appr_alg->cur_element_count);

for (size_t i = 0; i < appr_alg->cur_element_count; i++){
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
link_npy_offsets[i]=link_npy_size;
if (linkListSize)
link_npy_size += linkListSize;
Expand All @@ -513,7 +513,7 @@ class Index {
memcpy(appr_alg->data_level0_memory_, data_level0_npy.data(), data_level0_npy.nbytes());

for (size_t i = 0; i < appr_alg->max_elements_; i++) {
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
if (linkListSize == 0) {
appr_alg->linkLists_[i] = nullptr;
} else {
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import sys
import platform

import numpy as np
import pybind11
Expand Down Expand Up @@ -86,6 +87,8 @@ class BuildExt(build_ext):
}

if sys.platform == 'darwin':
if platform.machine() == 'arm64':
c_opts['unix'].remove('-march=native')
c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
else:
Expand Down

0 comments on commit 7cc0ecb

Please sign in to comment.