Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
andrusha97 committed Oct 17, 2017
0 parents commit d76eaa2
Show file tree
Hide file tree
Showing 17 changed files with 5,776 additions and 0 deletions.
35 changes: 35 additions & 0 deletions include/hnsw/detail/detail.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#pragma once

#include <algorithm>
#include <functional>
#include <queue>


namespace hnsw { namespace detail {


struct search_result_closer_t {
template<class T>
bool operator()(const T &l, const T &r) const {
return l.second < r.second;
}
};


struct search_result_further_t {
template<class T>
bool operator()(const T &l, const T &r) const {
return l.second > r.second;
}
};


template<class Base>
class priority_queue : public Base {
public:
using base_type = Base;
using Base::c;
};


}}
55 changes: 55 additions & 0 deletions include/hnsw/detail/dot_product.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#pragma once

#include "dot_product_avx.hpp"
#include "dot_product_sse2.hpp"


namespace hnsw { namespace detail {


template<class T>
T dot_product(const T *one, const T *another, std::size_t size) {
T sum = 0;

for (std::size_t i = 0; i < size; ++i) {
sum += one[i] * another[i];
}

return sum;
}


#if defined(HNSW_HAVE_AVX)

float dot_product(const float *pVect1, const float *pVect2, std::size_t qty) {
return dot_product_avx(pVect1, pVect2, qty);
}

#elif defined(HNSW_HAVE_SSE2)

float dot_product(const float *pVect1, const float *pVect2, std::size_t qty) {
return dot_product_sse2(pVect1, pVect2, qty);
}

#endif


#if defined(HNSW_HAVE_SSE2)

double dot_product(const double *pVect1, const double *pVect2, std::size_t qty) {
return dot_product_sse2(pVect1, pVect2, qty);
}

#endif


}}


#ifdef HNSW_HAVE_AVX
#undef HNSW_HAVE_AVX
#endif

#ifdef HNSW_HAVE_SSE2
#undef HNSW_HAVE_SSE2
#endif
80 changes: 80 additions & 0 deletions include/hnsw/detail/dot_product_avx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* This file contains code from the Non-metric Space Library
*
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info).
* With contributions from Lawrence Cayton (http://lcayton.com/) and others.
*
* For the complete list of contributors and further details see:
* https://github.com/searchivarius/NonMetricSpaceLib
*
* Copyright (c) 2014
*
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/

#pragma once

#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))

#include <x86intrin.h>

#ifdef __AVX__

#define HNSW_HAVE_AVX

namespace hnsw { namespace detail {


float dot_product_avx(const float *pVect1, const float *pVect2, std::size_t qty) {
static_assert(sizeof(float) == 4, "Cannot use SIMD instructions with non-32-bit floats.");

std::size_t qty16 = qty / 16;
std::size_t qty4 = qty / 4;

const float* pEnd1 = pVect1 + 16 * qty16;
const float* pEnd2 = pVect1 + 4 * qty4;
const float* pEnd3 = pVect1 + qty;

__m256 sum256 = _mm256_set1_ps(0);

while (pVect1 < pEnd1) {
//_mm_prefetch((char*)(pVect2 + 16), _MM_HINT_T0);

__m256 v1 = _mm256_loadu_ps(pVect1); pVect1 += 8;
__m256 v2 = _mm256_loadu_ps(pVect2); pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));

v1 = _mm256_loadu_ps(pVect1); pVect1 += 8;
v2 = _mm256_loadu_ps(pVect2); pVect2 += 8;
sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v1, v2));
}

__m128 v1, v2;
__m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0),
_mm256_extractf128_ps(sum256, 1));

while (pVect1 < pEnd2) {
v1 = _mm_loadu_ps(pVect1); pVect1 += 4;
v2 = _mm_loadu_ps(pVect2); pVect2 += 4;
sum_prod = _mm_add_ps(sum_prod, _mm_mul_ps(v1, v2));
}

float __attribute__((aligned(32))) TmpRes[4];
_mm_store_ps(TmpRes, sum_prod);
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];

while (pVect1 < pEnd3) {
sum += (*pVect1) * (*pVect2);
++pVect1; ++pVect2;
}

return sum;
}


}} // namespace hnsw::detail

#endif // __AVX__
#endif
135 changes: 135 additions & 0 deletions include/hnsw/detail/dot_product_sse2.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/**
* This file contains code from the Non-metric Space Library
*
* Authors: Bilegsaikhan Naidan (https://github.com/bileg), Leonid Boytsov (http://boytsov.info).
* With contributions from Lawrence Cayton (http://lcayton.com/) and others.
*
* For the complete list of contributors and further details see:
* https://github.com/searchivarius/NonMetricSpaceLib
*
* Copyright (c) 2014
*
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/

#pragma once

#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))

#include <x86intrin.h>

#ifdef __SSE2__

#define HNSW_HAVE_SSE2

namespace hnsw { namespace detail {


float dot_product_sse2(const float *pVect1, const float *pVect2, std::size_t qty) {
static_assert(sizeof(float) == 4, "Cannot use SIMD instructions with non-32-bit floats.");

std::size_t qty16 = qty/16;
std::size_t qty4 = qty/4;

const float* pEnd1 = pVect1 + 16 * qty16;
const float* pEnd2 = pVect1 + 4 * qty4;
const float* pEnd3 = pVect1 + qty;

__m128 v1, v2;
__m128 sum = _mm_set1_ps(0);

while (pVect1 < pEnd1) {
v1 = _mm_loadu_ps(pVect1); pVect1 += 4;
v2 = _mm_loadu_ps(pVect2); pVect2 += 4;
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2));

v1 = _mm_loadu_ps(pVect1); pVect1 += 4;
v2 = _mm_loadu_ps(pVect2); pVect2 += 4;
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2));

v1 = _mm_loadu_ps(pVect1); pVect1 += 4;
v2 = _mm_loadu_ps(pVect2); pVect2 += 4;
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2));

v1 = _mm_loadu_ps(pVect1); pVect1 += 4;
v2 = _mm_loadu_ps(pVect2); pVect2 += 4;
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2));
}

while (pVect1 < pEnd2) {
v1 = _mm_loadu_ps(pVect1); pVect1 += 4;
v2 = _mm_loadu_ps(pVect2); pVect2 += 4;
sum = _mm_add_ps(sum, _mm_mul_ps(v1, v2));
}

float __attribute__((aligned(16))) TmpRes[4];

_mm_store_ps(TmpRes, sum);
float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];

while (pVect1 < pEnd3) {
res += (*pVect1) * (*pVect2);
++pVect1; ++pVect2;
}

return res;
}


double dot_product_sse2(const double *pVect1, const double *pVect2, std::size_t qty) {
static_assert(sizeof(double) == 8, "Cannot use SIMD instructions with non-64-bit doubles.");

std::size_t qty8 = qty/8;
std::size_t qty2 = qty/2;

const double* pEnd1 = pVect1 + 8 * qty8;
const double* pEnd2 = pVect1 + 2 * qty2;
const double* pEnd3 = pVect1 + qty;

__m128d v1, v2;
__m128d sum = _mm_set1_pd(0);

while (pVect1 < pEnd1) {
v1 = _mm_loadu_pd(pVect1); pVect1 += 2;
v2 = _mm_loadu_pd(pVect2); pVect2 += 2;
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2));

v1 = _mm_loadu_pd(pVect1); pVect1 += 2;
v2 = _mm_loadu_pd(pVect2); pVect2 += 2;
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2));

v1 = _mm_loadu_pd(pVect1); pVect1 += 2;
v2 = _mm_loadu_pd(pVect2); pVect2 += 2;
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2));

v1 = _mm_loadu_pd(pVect1); pVect1 += 2;
v2 = _mm_loadu_pd(pVect2); pVect2 += 2;
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2));
}

while (pVect1 < pEnd2) {
v1 = _mm_loadu_pd(pVect1); pVect1 += 2;
v2 = _mm_loadu_pd(pVect2); pVect2 += 2;
sum = _mm_add_pd(sum, _mm_mul_pd(v1, v2));
}

double __attribute__((aligned(16))) TmpRes[2];

_mm_store_pd(TmpRes, sum);
double res= TmpRes[0] + TmpRes[1];

while (pVect1 < pEnd3) {
res += (*pVect1) * (*pVect2);
++pVect1; ++pVect2;
}

return res;
}


}} // namespace hnsw::detail

#endif // __SSE2__
#endif
Loading

0 comments on commit d76eaa2

Please sign in to comment.