Skip to content

Commit

Permalink
implemented clippy fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
bwsw committed Jun 5, 2024
1 parent b454f31 commit 1ad4ac2
Show file tree
Hide file tree
Showing 11 changed files with 361 additions and 359 deletions.
4 changes: 2 additions & 2 deletions examples/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn main() {
data.push(column);
}
// give an id to each data
let data_with_id = data.iter().zip(0..data.len()).collect();
let data_with_id = data.iter().zip(0..data.len()).collect::<Vec<_>>();

let ef_c = 200;
let max_nb_connection = 15;
Expand All @@ -50,7 +50,7 @@ fn main() {
start = ProcessTime::now();
begin_t = SystemTime::now();
for _i in 0..data_with_id.len() {
hns.insert(data_with_id[_i]);
hns.insert((data_with_id[_i].0.as_slice(), data_with_id[_i].1))
}
cpu_time = start.elapsed();
println!("\n\n serial hnsw data insertion {:?}", cpu_time);
Expand Down
38 changes: 19 additions & 19 deletions src/api.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Api for external language.
//! This file provides a trait to be used as an opaque pointer for C or Julia calls used in file libext.rs
use std::path::PathBuf;
use std::path::Path;

use serde::{de::DeserializeOwned, Serialize};

Expand All @@ -12,16 +12,16 @@ use anndists::dist::distances::Distance;
pub trait AnnT {
/// type of data vectors
type Val;
///
fn insert_data(&mut self, data: &Vec<Self::Val>, id: usize);
///
fn search_neighbours(&self, data: &Vec<Self::Val>, knbn: usize, ef_s: usize) -> Vec<Neighbour>;
///
fn parallel_insert_data(&mut self, data: &Vec<(&Vec<Self::Val>, usize)>);
///
//
fn insert_data(&mut self, data: &[Self::Val], id: usize);
//
fn search_neighbours(&self, data: &[Self::Val], knbn: usize, ef_s: usize) -> Vec<Neighbour>;
//
fn parallel_insert_data(&mut self, data: &[(&Vec<Self::Val>, usize)]);
//
fn parallel_search_neighbours(
&self,
data: &Vec<Vec<Self::Val>>,
data: &[Vec<Self::Val>],
knbn: usize,
ef_s: usize,
) -> Vec<Vec<Neighbour>>;
Expand All @@ -32,7 +32,7 @@ pub trait AnnT {
/// **We do not overwrite old files if they are currently in use by memory map**
/// If these files already exist , they are not overwritten and a unique filename is generated by concatenating a random number to filename.
/// The function returns the basename used for the dump
fn file_dump(&self, path: &PathBuf, file_basename: &str) -> anyhow::Result<String>;
fn file_dump(&self, path: &Path, file_basename: &str) -> anyhow::Result<String>;
}

impl<'b, T, D> AnnT for Hnsw<'b, T, D>
Expand All @@ -41,21 +41,21 @@ where
D: Distance<T> + Send + Sync,
{
type Val = T;
///
fn insert_data(&mut self, data: &Vec<Self::Val>, id: usize) {
//
fn insert_data(&mut self, data: &[Self::Val], id: usize) {
self.insert((data, id));
}
///
fn search_neighbours(&self, data: &Vec<T>, knbn: usize, ef_s: usize) -> Vec<Neighbour> {
//
fn search_neighbours(&self, data: &[T], knbn: usize, ef_s: usize) -> Vec<Neighbour> {
self.search(data, knbn, ef_s)
}
fn parallel_insert_data(&mut self, data: &Vec<(&Vec<Self::Val>, usize)>) {
fn parallel_insert_data(&mut self, data: &[(&Vec<Self::Val>, usize)]) {
self.parallel_insert(data);
}

fn parallel_search_neighbours(
&self,
data: &Vec<Vec<Self::Val>>,
data: &[Vec<Self::Val>],
knbn: usize,
ef_s: usize,
) -> Vec<Vec<Neighbour>> {
Expand All @@ -68,7 +68,7 @@ where
///
///
fn file_dump(&self, path: &PathBuf, file_basename: &str) -> anyhow::Result<String> {
fn file_dump(&self, path: &Path, file_basename: &str) -> anyhow::Result<String> {
log::info!("in Hnsw::file_dump");
//
// do not overwrite if mmap is active
Expand All @@ -81,9 +81,9 @@ where
dumpinit.flush()?;
log::info!("end of dump");
if res.is_ok() {
return Ok(dumpname);
Ok(dumpname)
} else {
return Err(anyhow::anyhow!("unexpected error"));
Err(anyhow::anyhow!("unexpected error"))
}
} // end of dump
} // end of impl block AnnT for Hnsw<T,D>
54 changes: 26 additions & 28 deletions src/datamap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use std::io::BufReader;

use std::fs::{File, OpenOptions};
use std::path::PathBuf;
use std::path::{Path, PathBuf};

use indexmap::map::IndexMap;
use log::log_enabled;
Expand All @@ -32,7 +32,7 @@ pub struct DataMap {
t_name: String,
/// dimension of data vector
dimension: usize,
///
//
distname: String,
} // end of DataMap

Expand All @@ -41,11 +41,14 @@ impl DataMap {
/// The fname argument corresponds to the basename of the dump.
/// To reload from file fname.hnsw.data just pass fname as argument.
/// The dir argument is the directory where the fname.hnsw.data and fname.hnsw.graph reside.
pub fn from_hnswdump<T: std::fmt::Debug>(dir: &str, fname: &String) -> Result<DataMap, String> {
pub fn from_hnswdump<T: std::fmt::Debug>(
dir: &Path,
file_name: &str,
) -> Result<DataMap, String> {
// reload description to have data type, and check for dump version
let mut graphpath = PathBuf::new();
let mut graphpath = PathBuf::from(dir);
graphpath.push(dir);
let mut filename = fname.clone();
let mut filename = file_name.to_string();
filename.push_str(".hnsw.graph");
graphpath.push(filename);
let graphfileres = OpenOptions::new().read(true).open(&graphpath);
Expand Down Expand Up @@ -82,7 +85,7 @@ impl DataMap {
//
let mut datapath = PathBuf::new();
datapath.push(dir);
let mut filename = fname.clone();
let mut filename = file_name.to_string();
filename.push_str(".hnsw.data");
datapath.push(filename);
//
Expand Down Expand Up @@ -132,8 +135,8 @@ impl DataMap {
&mapped_slice[current_mmap_addr..current_mmap_addr + std::mem::size_of::<usize>()],
);
current_mmap_addr += std::mem::size_of::<usize>();
let dimension = usize::from_ne_bytes(usize_slice) as usize;
if dimension as usize != descr_dimension {
let dimension = usize::from_ne_bytes(usize_slice);
if dimension != descr_dimension {
log::error!("description and data do not agree on dimension, data got : {:?}, description got : {:?}",dimension, descr_dimension);
return Err(String::from(
"description and data do not agree on dimension",
Expand Down Expand Up @@ -195,16 +198,13 @@ impl DataMap {
if log_enabled!(log::Level::Debug) && i == 0 {
log::debug!("serialized bytes len to reload {:?}", serialized_len);
}
let mut v_serialized = Vec::<u8>::with_capacity(serialized_len);
// TODO avoid initialization
v_serialized.resize(serialized_len as usize, 0);
let mut v_serialized = vec![0; serialized_len];
v_serialized.copy_from_slice(
&mapped_slice[current_mmap_addr..current_mmap_addr + serialized_len],
);
current_mmap_addr += serialized_len;
let slice_t = unsafe {
std::slice::from_raw_parts(v_serialized.as_ptr() as *const T, dimension as usize)
};
let slice_t =
unsafe { std::slice::from_raw_parts(v_serialized.as_ptr() as *const T, dimension) };
log::trace!(
"deserialized v : {:?} address : {:?} ",
slice_t,
Expand All @@ -223,7 +223,7 @@ impl DataMap {
distname,
};
//
return Ok(datamap);
Ok(datamap)
} // end of from_datas

//
Expand Down Expand Up @@ -252,14 +252,14 @@ impl DataMap {
let datat_name_arg_last = datat_name_vec.last().unwrap();
//
if datat_name_arg_last == tname_last {
return true;
true
} else {
log::info!(
"data type in DataMap : {}, type arg = {}",
tname_last,
datat_name_arg_last
);
return false;
false
}
} // end of check_data_type

Expand All @@ -272,12 +272,9 @@ impl DataMap {
pub fn get_data<'a, T: Clone + std::fmt::Debug>(&'a self, dataid: &DataId) -> Option<&'a [T]> {
//
log::trace!("in DataMap::get_data, dataid : {:?}", dataid);
let address = self.hmap.get(dataid);
if address.is_none() {
return None;
}
log::debug!(" adress for id : {}, address : {:?}", dataid, address);
let mut current_mmap_addr = *address.unwrap();
let address = self.hmap.get(dataid)?;
log::debug!(" address for id : {}, address : {:?}", dataid, address);
let mut current_mmap_addr = *address;
let mapped_slice = self.mmap.as_slice();
let mut u64_slice = [0u8; std::mem::size_of::<u64>()];
u64_slice.copy_from_slice(
Expand All @@ -289,7 +286,7 @@ impl DataMap {
let slice_t = unsafe {
std::slice::from_raw_parts(
mapped_slice[current_mmap_addr..].as_ptr() as *const T,
self.dimension as usize,
self.dimension,
)
};
Some(slice_t)
Expand All @@ -303,12 +300,12 @@ impl DataMap {

/// returns full data type name
pub fn get_data_typename(&self) -> String {
return self.t_name.clone();
self.t_name.clone()
}

/// returns full data type name
pub fn get_distname(&self) -> String {
return self.distname.clone();
self.distname.clone()
}

/// return the number of data in mmap
Expand Down Expand Up @@ -382,7 +379,7 @@ mod tests {
}
//
// now we have check that datamap seems ok, test reload of hnsw with mmap
let datamap: DataMap = DataMap::from_hnswdump::<f32>(".", &fname).unwrap();
let datamap: DataMap = DataMap::from_hnswdump::<f32>(&Path::new("."), &fname).unwrap();
let nb_test = 30;
log::info!("checking random access of id , nb test : {}", nb_test);
for _ in 0..nb_test {
Expand Down Expand Up @@ -439,7 +436,8 @@ mod tests {
let directory = PathBuf::from(".");
let _res = hnsw.file_dump(&directory, &fname);
// now we have check that datamap seems ok, test reload of hnsw with mmap
let datamap: DataMap = DataMap::from_hnswdump::<u32>(".", &fname.to_string()).unwrap();
let datamap: DataMap =
DataMap::from_hnswdump::<u32>(&directory.as_path(), &fname.to_string()).unwrap();
// testing type check
assert!(datamap.check_data_type::<u32>());
assert!(!datamap.check_data_type::<f32>());
Expand Down
7 changes: 2 additions & 5 deletions src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ pub trait FilterT {

impl FilterT for Vec<usize> {
fn hnsw_filter(&self, id: &DataId) -> bool {
return match &self.binary_search(id) {
Ok(_) => true,
_ => false,
};
self.binary_search(id).is_ok()
}
}

Expand All @@ -22,6 +19,6 @@ where
F: Fn(&DataId) -> bool,
{
fn hnsw_filter(&self, id: &DataId) -> bool {
return self(id);
self(id)
}
}
50 changes: 21 additions & 29 deletions src/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ use hnsw::*;

impl PartialEq for Neighbour {
fn eq(&self, other: &Neighbour) -> bool {
return self.distance == other.distance;
self.distance == other.distance
} // end eq
}

impl Eq for Neighbour {}

// order points by distance to self.
#[allow(clippy::non_canonical_partial_ord_impl)]
impl PartialOrd for Neighbour {
fn partial_cmp(&self, other: &Neighbour) -> Option<Ordering> {
self.distance.partial_cmp(&other.distance)
Expand Down Expand Up @@ -57,15 +58,15 @@ pub struct FlatPoint {
impl FlatPoint {
/// returns the neighbours orderded by distance.
pub fn get_neighbours(&self) -> &Vec<Neighbour> {
return &self.neighbours;
&self.neighbours
}
/// returns the origin id of the point
pub fn get_id(&self) -> DataId {
return self.origin_id;
self.origin_id
}
///
//
pub fn get_p_id(&self) -> PointId {
return self.p_id;
self.p_id
}
} // end impl block for FlatPoint

Expand All @@ -79,12 +80,11 @@ fn flatten_point<T: Clone + Send + Sync>(point: &Point<T>) -> FlatPoint {
}
}
flat_neighbours.sort_unstable();
let fpoint = FlatPoint {
FlatPoint {
origin_id: point.get_origin_id(),
p_id: point.get_point_id(),
neighbours: flat_neighbours,
};
fpoint
}
} // end of flatten_point

/// A structure providing neighbourhood information of a point stored in the Hnsw structure given its DataId.
Expand All @@ -98,11 +98,11 @@ impl FlatNeighborhood {
/// get neighbour of a point given its id.
/// The neighbours are sorted in increasing distance from data_id.
pub fn get_neighbours(&self, p_id: DataId) -> Option<Vec<Neighbour>> {
let res = match self.hash_t.get(&p_id) {
Some(point) => Some(point.get_neighbours().clone()),
_ => None,
};
return res;
let res = self
.hash_t
.get(&p_id)
.map(|point| point.get_neighbours().clone());
res
}
} // end impl block for FlatNeighborhood

Expand All @@ -113,24 +113,16 @@ impl<'b, T: Clone + Send + Sync, D: Distance<T> + Send + Sync> From<&Hnsw<'b, T,
/// Useful after reloading from a dump with T=NoData and D = NoDist as points are then reloaded with neighbourhood information only.
fn from(hnsw: &Hnsw<T, D>) -> Self {
let mut hash_t = HashMap::new();
let mut ptiter = hnsw.get_point_indexation().into_iter();
let pt_iter = hnsw.get_point_indexation().into_iter();
//
loop {
if let Some(point) = ptiter.next() {
// println!("point : {:?}", _point.p_id);
let res_insert = hash_t.insert(point.get_origin_id(), flatten_point(&point));
match res_insert {
Some(old_point) => {
println!("2 points with same origin id {:?}", old_point.origin_id);
log::error!("2 points with same origin id {:?}", old_point.origin_id);
}
_ => (),
} // end match
} else {
break;
for point in pt_iter {
// println!("point : {:?}", _point.p_id);
let res_insert = hash_t.insert(point.get_origin_id(), flatten_point(&point));
if let Some(old_point) = res_insert {
log::error!("2 points with same origin id {:?}", old_point.origin_id);
}
} // end while
return FlatNeighborhood { hash_t };
}
FlatNeighborhood { hash_t }
}
} // e,d of Fom implementation

Expand Down
Loading

0 comments on commit 1ad4ac2

Please sign in to comment.