Initial commit.

andrusha97 · Oct 17, 2017 · d76eaa2 · d76eaa2
commit d76eaa2
Show file tree

Hide file tree

Showing 17 changed files with 5,776 additions and 0 deletions.
diff --git a/include/hnsw/detail/detail.hpp b/include/hnsw/detail/detail.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <algorithm>
+#include <functional>
+#include <queue>
+
+
+namespace hnsw { namespace detail {
+
+
+struct search_result_closer_t {
+    template<class T>
+    bool operator()(const T &l, const T &r) const {
+        return l.second < r.second;
+    }
+};
+
+
+struct search_result_further_t {
+    template<class T>
+    bool operator()(const T &l, const T &r) const {
+        return l.second > r.second;
+    }
+};
+
+
+template<class Base>
+class priority_queue : public Base {
+public:
+    using base_type = Base;
+    using Base::c;
+};
+
+
+}}
diff --git a/include/hnsw/detail/dot_product.hpp b/include/hnsw/detail/dot_product.hpp
@@ -0,0 +1,55 @@
+#pragma once
+
+#include "dot_product_avx.hpp"
+#include "dot_product_sse2.hpp"
+
+
+namespace hnsw { namespace detail {
+
+
+template<class T>
+T dot_product(const T *one, const T *another, std::size_t size) {
+    T sum = 0;
+
+    for (std::size_t i = 0; i < size; ++i) {
+        sum += one[i] * another[i];
+    }
+
+    return sum;
+}
+
+
+#if defined(HNSW_HAVE_AVX)
+
+float dot_product(const float *pVect1, const float *pVect2, std::size_t qty) {
+    return dot_product_avx(pVect1, pVect2, qty);
+}
+
+#elif defined(HNSW_HAVE_SSE2)
+
+float dot_product(const float *pVect1, const float *pVect2, std::size_t qty) {
+    return dot_product_sse2(pVect1, pVect2, qty);
+}
+
+#endif
+
+
+#if defined(HNSW_HAVE_SSE2)
+
+double dot_product(const double *pVect1, const double *pVect2, std::size_t qty) {
+    return dot_product_sse2(pVect1, pVect2, qty);
+}
+
+#endif
+
+
+}}
+
+
+#ifdef HNSW_HAVE_AVX
+#undef HNSW_HAVE_AVX
+#endif
+
+#ifdef HNSW_HAVE_SSE2
+#undef HNSW_HAVE_SSE2
+#endif
diff --git a/include/hnsw/detail/dot_product_avx.hpp b/include/hnsw/detail/dot_product_avx.hpp
@@ -0,0 +1,80 @@
+/**
+ * This file contains code from the Non-metric Space Library
+ *
+ * Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info).
+ * With contributions from Lawrence Cayton (http://lcayton.com/) and others.
+ *
+ * For the complete list of contributors and further details see:
+ * https://github.com/searchivarius/NonMetricSpaceLib
+ *
+ * Copyright (c) 2014
+ *
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ */
+
+#pragma once
+
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+
+#include <x86intrin.h>
+
+#ifdef __AVX__
+
+#define HNSW_HAVE_AVX
+
+namespace hnsw { namespace detail {
+
+
+float dot_product_avx(const float *pVect1, const float *pVect2, std::size_t qty) {
+    static_assert(sizeof(float) == 4, "Cannot use SIMD instructions with non-32-bit floats.");
+
+    std::size_t qty16 = qty / 16;
+    std::size_t qty4 = qty / 4;
+
+    const float* pEnd1 = pVect1 + 16 * qty16;
+    const float* pEnd2 = pVect1 + 4 * qty4;
+    const float* pEnd3 = pVect1 + qty;
+
+    __m256  sum256 = _mm256_set1_ps(0);
+
+    while (pVect1 < pEnd1) {
+        //_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);
+
+        __m256 v1 = _mm256_loadu_ps(pVect1); pVect1 += 8;
+        __m256 v2 = _mm256_loadu_ps(pVect2); pVect2 += 8;
+        sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
+
+        v1 = _mm256_loadu_ps(pVect1); pVect1 += 8;
+        v2 = _mm256_loadu_ps(pVect2); pVect2 += 8;
+        sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
+    }
+
+    __m128  v1, v2;
+    __m128  sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0),
+                                  _mm256_extractf128_ps(sum256, 1));
+
+    while (pVect1 < pEnd2) {
+        v1 = _mm_loadu_ps(pVect1); pVect1 += 4;
+        v2 = _mm_loadu_ps(pVect2); pVect2 += 4;
+        sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
+    }
+
+    float __attribute__((aligned(32))) TmpRes[4];
+    _mm_store_ps(TmpRes, sum_prod);
+    float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
+
+    while (pVect1 < pEnd3) {
+        sum += (*pVect1) * (*pVect2);
+        ++pVect1; ++pVect2;
+    }
+
+    return sum;
+}
+
+
+}} // namespace hnsw::detail
+
+#endif // __AVX__
+#endif
diff --git a/include/hnsw/detail/dot_product_sse2.hpp b/include/hnsw/detail/dot_product_sse2.hpp
@@ -0,0 +1,135 @@
+/**
+ * This file contains code from the Non-metric Space Library
+ *
+ * Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info).
+ * With contributions from Lawrence Cayton (http://lcayton.com/) and others.
+ *
+ * For the complete list of contributors and further details see:
+ * https://github.com/searchivarius/NonMetricSpaceLib
+ *
+ * Copyright (c) 2014
+ *
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ */
+
+#pragma once
+
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+
+#include <x86intrin.h>
+
+#ifdef __SSE2__
+
+#define HNSW_HAVE_SSE2
+
+namespace hnsw { namespace detail {
+
+
+float dot_product_sse2(const float *pVect1, const float *pVect2, std::size_t qty) {
+    static_assert(sizeof(float) == 4, "Cannot use SIMD instructions with non-32-bit floats.");
+
+    std::size_t qty16  = qty/16;
+    std::size_t qty4  = qty/4;
+
+    const float* pEnd1 = pVect1 + 16  * qty16;
+    const float* pEnd2 = pVect1 + 4  * qty4;
+    const float* pEnd3 = pVect1 + qty;
+
+    __m128  v1, v2;
+    __m128  sum = _mm_set1_ps(0);
+
+    while (pVect1 < pEnd1) {
+        v1   = _mm_loadu_ps(pVect1); pVect1 += 4;
+        v2   = _mm_loadu_ps(pVect2); pVect2 += 4;
+        sum  = _mm_add_ps(sum, _mm_mul_ps(v1, v2));
+
+        v1   = _mm_loadu_ps(pVect1); pVect1 += 4;
+        v2   = _mm_loadu_ps(pVect2); pVect2 += 4;
+        sum  = _mm_add_ps(sum, _mm_mul_ps(v1, v2));
+
+        v1   = _mm_loadu_ps(pVect1); pVect1 += 4;
+        v2   = _mm_loadu_ps(pVect2); pVect2 += 4;
+        sum  = _mm_add_ps(sum, _mm_mul_ps(v1, v2));
+
+        v1   = _mm_loadu_ps(pVect1); pVect1 += 4;
+        v2   = _mm_loadu_ps(pVect2); pVect2 += 4;
+        sum  = _mm_add_ps(sum, _mm_mul_ps(v1, v2));
+    }
+
+    while (pVect1 < pEnd2) {
+        v1   = _mm_loadu_ps(pVect1); pVect1 += 4;
+        v2   = _mm_loadu_ps(pVect2); pVect2 += 4;
+        sum  = _mm_add_ps(sum, _mm_mul_ps(v1, v2));
+    }
+
+    float __attribute__((aligned(16))) TmpRes[4];
+
+    _mm_store_ps(TmpRes, sum);
+    float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
+
+    while (pVect1 < pEnd3) {
+        res += (*pVect1) * (*pVect2);
+        ++pVect1; ++pVect2;
+    }
+
+    return res;
+}
+
+
+double dot_product_sse2(const double *pVect1, const double *pVect2, std::size_t qty) {
+    static_assert(sizeof(double) == 8, "Cannot use SIMD instructions with non-64-bit doubles.");
+
+    std::size_t qty8 = qty/8;
+    std::size_t qty2 = qty/2;
+
+    const double* pEnd1 = pVect1 + 8 * qty8;
+    const double* pEnd2 = pVect1 + 2 * qty2;
+    const double* pEnd3 = pVect1 + qty;
+
+    __m128d  v1, v2;
+    __m128d  sum = _mm_set1_pd(0);
+
+    while (pVect1 < pEnd1) {
+        v1   = _mm_loadu_pd(pVect1); pVect1 += 2;
+        v2   = _mm_loadu_pd(pVect2); pVect2 += 2;
+        sum  = _mm_add_pd(sum, _mm_mul_pd(v1, v2));
+
+        v1   = _mm_loadu_pd(pVect1); pVect1 += 2;
+        v2   = _mm_loadu_pd(pVect2); pVect2 += 2;
+        sum  = _mm_add_pd(sum, _mm_mul_pd(v1, v2));
+
+        v1   = _mm_loadu_pd(pVect1); pVect1 += 2;
+        v2   = _mm_loadu_pd(pVect2); pVect2 += 2;
+        sum  = _mm_add_pd(sum, _mm_mul_pd(v1, v2));
+
+        v1   = _mm_loadu_pd(pVect1); pVect1 += 2;
+        v2   = _mm_loadu_pd(pVect2); pVect2 += 2;
+        sum  = _mm_add_pd(sum, _mm_mul_pd(v1, v2));
+    }
+
+    while (pVect1 < pEnd2) {
+        v1   = _mm_loadu_pd(pVect1); pVect1 += 2;
+        v2   = _mm_loadu_pd(pVect2); pVect2 += 2;
+        sum  = _mm_add_pd(sum, _mm_mul_pd(v1, v2));
+    }
+
+    double __attribute__((aligned(16))) TmpRes[2];
+
+    _mm_store_pd(TmpRes, sum);
+    double res= TmpRes[0] + TmpRes[1];
+
+    while (pVect1 < pEnd3) {
+        res += (*pVect1) * (*pVect2);
+        ++pVect1; ++pVect2;
+    }
+
+    return res;
+}
+
+
+}} // namespace hnsw::detail
+
+#endif // __SSE2__
+#endif