Skip to content

Commit

Permalink
SMEV-1899: added methods to store / get ids in index
Browse files Browse the repository at this point in the history
  • Loading branch information
DocheA02 committed Aug 24, 2021
1 parent ffb0bfb commit 7cb93f2
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 0 deletions.
6 changes: 6 additions & 0 deletions hnswlib-jna/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@
<artifactId>jna</artifactId>
<version>${jna.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/it.unimi.dsi/fastutil -->
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>8.5.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import com.stepstone.search.hnswlib.jna.exception.UnableToCreateNewIndexInstanceException;
import com.stepstone.search.hnswlib.jna.exception.UnexpectedNativeException;
import com.sun.jna.Pointer;
import it.unimi.dsi.fastutil.ints.IntArraySet;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.ints.IntSets;

import java.nio.file.Path;
import java.util.Optional;
Expand Down Expand Up @@ -38,6 +41,7 @@ public class Index {
private SpaceName spaceName;
private int dimension;
private boolean referenceReused;
private IntSet ids = IntSets.synchronize(new IntArraySet());

public Index(SpaceName spaceName, int dimension) {
this.spaceName = spaceName;
Expand Down Expand Up @@ -122,6 +126,33 @@ public void addItem(float[] item, int id) {
checkResultCode(hnswlib.addItemToIndex(item, false, id, reference));
}

/**
* Add an item with ID to the index. It won't apply any extra normalization
* unless it is required by the Vector Space (e.g., COSINE).
* Save id to internal collection if saveId = true
* @param item item
* @param id id
* @param saveId true to save id to internal collection
*/
public void addItem(float[] item, int id, boolean saveId) {
addItem(item, id);
if (saveId) {
ids.add(id);
}
}

public void setIds(IntSet ids) {
this.ids = ids;
}

/**
* Get ids for items in this index
* @return set of ids
*/
public IntSet getIds() {
return ids;
}

/**
* Add a normalized item without ID to the index. Internally, an incremental
* ID (starting from 0) will be given to this item.
Expand All @@ -142,6 +173,19 @@ public void addNormalizedItem(float[] item, int id) {
checkResultCode(hnswlib.addItemToIndex(item, true, id, reference));
}

/**
* Add a normalized item with ID to the index.
*
* @param item - float array with the length expected by the index (dimension);
* @param id - an identifier used by the native library.
*/
public void addNormalizedItem(float[] item, int id, boolean saveId) {
addNormalizedItem(item, id);
if (saveId) {
ids.add(id);
}
}

/**
* Return the number of elements already inserted in
* the index.
Expand Down Expand Up @@ -328,6 +372,9 @@ public int getEfConstruction(){
*/
public void markDeleted(int id){
checkResultCode(hnswlib.markDeleted(reference, id));
if (ids.contains(id)) {
ids.remove(id);
}
}

private void checkIndexIsInitialized() {
Expand Down Expand Up @@ -372,6 +419,7 @@ public static Index synchronizedIndex(Index index) {
concurrentIndex.reference = index.reference;
concurrentIndex.cleared = index.cleared;
concurrentIndex.initialized = index.initialized;
concurrentIndex.setIds(index.getIds());
index.referenceReused = true;
return concurrentIndex;
}
Expand Down
Binary file modified hnswlib-jna/src/main/resources/libhnswlib-jna-x86-64.dll
Binary file not shown.
Binary file modified hnswlib-jna/src/main/resources/libhnswlib-jna-x86-64.exp
Binary file not shown.
Binary file modified hnswlib-jna/src/main/resources/libhnswlib-jna-x86-64.libw
Binary file not shown.

0 comments on commit 7cb93f2

Please sign in to comment.