-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit d76eaa2
Showing
17 changed files
with
5,776 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#pragma once | ||
|
||
#include <algorithm> | ||
#include <functional> | ||
#include <queue> | ||
|
||
|
||
namespace hnsw { namespace detail { | ||
|
||
|
||
struct search_result_closer_t { | ||
template<class T> | ||
bool operator()(const T &l, const T &r) const { | ||
return l.second < r.second; | ||
} | ||
}; | ||
|
||
|
||
struct search_result_further_t { | ||
template<class T> | ||
bool operator()(const T &l, const T &r) const { | ||
return l.second > r.second; | ||
} | ||
}; | ||
|
||
|
||
template<class Base> | ||
class priority_queue : public Base { | ||
public: | ||
using base_type = Base; | ||
using Base::c; | ||
}; | ||
|
||
|
||
}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#pragma once | ||
|
||
#include "dot_product_avx.hpp" | ||
#include "dot_product_sse2.hpp" | ||
|
||
|
||
namespace hnsw { namespace detail { | ||
|
||
|
||
template<class T> | ||
T dot_product(const T *one, const T *another, std::size_t size) { | ||
T sum = 0; | ||
|
||
for (std::size_t i = 0; i < size; ++i) { | ||
sum += one[i] * another[i]; | ||
} | ||
|
||
return sum; | ||
} | ||
|
||
|
||
#if defined(HNSW_HAVE_AVX) | ||
|
||
float dot_product(const float *pVect1, const float *pVect2, std::size_t qty) { | ||
return dot_product_avx(pVect1, pVect2, qty); | ||
} | ||
|
||
#elif defined(HNSW_HAVE_SSE2) | ||
|
||
float dot_product(const float *pVect1, const float *pVect2, std::size_t qty) { | ||
return dot_product_sse2(pVect1, pVect2, qty); | ||
} | ||
|
||
#endif | ||
|
||
|
||
#if defined(HNSW_HAVE_SSE2) | ||
|
||
double dot_product(const double *pVect1, const double *pVect2, std::size_t qty) { | ||
return dot_product_sse2(pVect1, pVect2, qty); | ||
} | ||
|
||
#endif | ||
|
||
|
||
}} | ||
|
||
|
||
#ifdef HNSW_HAVE_AVX | ||
#undef HNSW_HAVE_AVX | ||
#endif | ||
|
||
#ifdef HNSW_HAVE_SSE2 | ||
#undef HNSW_HAVE_SSE2 | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/** | ||
* This file contains code from the Non-metric Space Library | ||
* | ||
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info). | ||
* With contributions from Lawrence Cayton (http://lcayton.com/) and others. | ||
* | ||
* For the complete list of contributors and further details see: | ||
* https://github.com/searchivarius/NonMetricSpaceLib | ||
* | ||
* Copyright (c) 2014 | ||
* | ||
* This code is released under the | ||
* Apache License Version 2.0 http://www.apache.org/licenses/. | ||
* | ||
*/ | ||
|
||
#pragma once | ||
|
||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) | ||
|
||
#include <x86intrin.h> | ||
|
||
#ifdef __AVX__ | ||
|
||
#define HNSW_HAVE_AVX | ||
|
||
namespace hnsw { namespace detail { | ||
|
||
|
||
float dot_product_avx(const float *pVect1, const float *pVect2, std::size_t qty) { | ||
static_assert(sizeof(float) == 4, "Cannot use SIMD instructions with non-32-bit floats."); | ||
|
||
std::size_t qty16 = qty / 16; | ||
std::size_t qty4 = qty / 4; | ||
|
||
const float* pEnd1 = pVect1 + 16 * qty16; | ||
const float* pEnd2 = pVect1 + 4 * qty4; | ||
const float* pEnd3 = pVect1 + qty; | ||
|
||
__m256 sum256 = _mm256_set1_ps(0); | ||
|
||
while (pVect1 < pEnd1) { | ||
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0); | ||
|
||
__m256 v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; | ||
__m256 v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; | ||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); | ||
|
||
v1 = _mm256_loadu_ps(pVect1); pVect1 += 8; | ||
v2 = _mm256_loadu_ps(pVect2); pVect2 += 8; | ||
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2)); | ||
} | ||
|
||
__m128 v1, v2; | ||
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), | ||
_mm256_extractf128_ps(sum256, 1)); | ||
|
||
while (pVect1 < pEnd2) { | ||
v1 = _mm_loadu_ps(pVect1); pVect1 += 4; | ||
v2 = _mm_loadu_ps(pVect2); pVect2 += 4; | ||
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2)); | ||
} | ||
|
||
float __attribute__((aligned(32))) TmpRes[4]; | ||
_mm_store_ps(TmpRes, sum_prod); | ||
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; | ||
|
||
while (pVect1 < pEnd3) { | ||
sum += (*pVect1) * (*pVect2); | ||
++pVect1; ++pVect2; | ||
} | ||
|
||
return sum; | ||
} | ||
|
||
|
||
}} // namespace hnsw::detail | ||
|
||
#endif // __AVX__ | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/** | ||
* This file contains code from the Non-metric Space Library | ||
* | ||
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info). | ||
* With contributions from Lawrence Cayton (http://lcayton.com/) and others. | ||
* | ||
* For the complete list of contributors and further details see: | ||
* https://github.com/searchivarius/NonMetricSpaceLib | ||
* | ||
* Copyright (c) 2014 | ||
* | ||
* This code is released under the | ||
* Apache License Version 2.0 http://www.apache.org/licenses/. | ||
* | ||
*/ | ||
|
||
#pragma once | ||
|
||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) | ||
|
||
#include <x86intrin.h> | ||
|
||
#ifdef __SSE2__ | ||
|
||
#define HNSW_HAVE_SSE2 | ||
|
||
namespace hnsw { namespace detail { | ||
|
||
|
||
float dot_product_sse2(const float *pVect1, const float *pVect2, std::size_t qty) { | ||
static_assert(sizeof(float) == 4, "Cannot use SIMD instructions with non-32-bit floats."); | ||
|
||
std::size_t qty16 = qty/16; | ||
std::size_t qty4 = qty/4; | ||
|
||
const float* pEnd1 = pVect1 + 16 * qty16; | ||
const float* pEnd2 = pVect1 + 4 * qty4; | ||
const float* pEnd3 = pVect1 + qty; | ||
|
||
__m128 v1, v2; | ||
__m128 sum = _mm_set1_ps(0); | ||
|
||
while (pVect1 < pEnd1) { | ||
v1 = _mm_loadu_ps(pVect1); pVect1 += 4; | ||
v2 = _mm_loadu_ps(pVect2); pVect2 += 4; | ||
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2)); | ||
|
||
v1 = _mm_loadu_ps(pVect1); pVect1 += 4; | ||
v2 = _mm_loadu_ps(pVect2); pVect2 += 4; | ||
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2)); | ||
|
||
v1 = _mm_loadu_ps(pVect1); pVect1 += 4; | ||
v2 = _mm_loadu_ps(pVect2); pVect2 += 4; | ||
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2)); | ||
|
||
v1 = _mm_loadu_ps(pVect1); pVect1 += 4; | ||
v2 = _mm_loadu_ps(pVect2); pVect2 += 4; | ||
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2)); | ||
} | ||
|
||
while (pVect1 < pEnd2) { | ||
v1 = _mm_loadu_ps(pVect1); pVect1 += 4; | ||
v2 = _mm_loadu_ps(pVect2); pVect2 += 4; | ||
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2)); | ||
} | ||
|
||
float __attribute__((aligned(16))) TmpRes[4]; | ||
|
||
_mm_store_ps(TmpRes, sum); | ||
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; | ||
|
||
while (pVect1 < pEnd3) { | ||
res += (*pVect1) * (*pVect2); | ||
++pVect1; ++pVect2; | ||
} | ||
|
||
return res; | ||
} | ||
|
||
|
||
double dot_product_sse2(const double *pVect1, const double *pVect2, std::size_t qty) { | ||
static_assert(sizeof(double) == 8, "Cannot use SIMD instructions with non-64-bit doubles."); | ||
|
||
std::size_t qty8 = qty/8; | ||
std::size_t qty2 = qty/2; | ||
|
||
const double* pEnd1 = pVect1 + 8 * qty8; | ||
const double* pEnd2 = pVect1 + 2 * qty2; | ||
const double* pEnd3 = pVect1 + qty; | ||
|
||
__m128d v1, v2; | ||
__m128d sum = _mm_set1_pd(0); | ||
|
||
while (pVect1 < pEnd1) { | ||
v1 = _mm_loadu_pd(pVect1); pVect1 += 2; | ||
v2 = _mm_loadu_pd(pVect2); pVect2 += 2; | ||
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2)); | ||
|
||
v1 = _mm_loadu_pd(pVect1); pVect1 += 2; | ||
v2 = _mm_loadu_pd(pVect2); pVect2 += 2; | ||
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2)); | ||
|
||
v1 = _mm_loadu_pd(pVect1); pVect1 += 2; | ||
v2 = _mm_loadu_pd(pVect2); pVect2 += 2; | ||
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2)); | ||
|
||
v1 = _mm_loadu_pd(pVect1); pVect1 += 2; | ||
v2 = _mm_loadu_pd(pVect2); pVect2 += 2; | ||
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2)); | ||
} | ||
|
||
while (pVect1 < pEnd2) { | ||
v1 = _mm_loadu_pd(pVect1); pVect1 += 2; | ||
v2 = _mm_loadu_pd(pVect2); pVect2 += 2; | ||
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2)); | ||
} | ||
|
||
double __attribute__((aligned(16))) TmpRes[2]; | ||
|
||
_mm_store_pd(TmpRes, sum); | ||
double res= TmpRes[0] + TmpRes[1]; | ||
|
||
while (pVect1 < pEnd3) { | ||
res += (*pVect1) * (*pVect2); | ||
++pVect1; ++pVect2; | ||
} | ||
|
||
return res; | ||
} | ||
|
||
|
||
}} // namespace hnsw::detail | ||
|
||
#endif // __SSE2__ | ||
#endif |
Oops, something went wrong.