Skip to content

Commit 9bb1d99

Browse files
authored
Merge pull request #1 from bwsw/clippy-fixes
Implemented clippy fixes
2 parents b454f31 + 1ad4ac2 commit 9bb1d99

File tree

11 files changed

+361
-359
lines changed

11 files changed

+361
-359
lines changed

Diff for: examples/random.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ fn main() {
2323
data.push(column);
2424
}
2525
// give an id to each data
26-
let data_with_id = data.iter().zip(0..data.len()).collect();
26+
let data_with_id = data.iter().zip(0..data.len()).collect::<Vec<_>>();
2727

2828
let ef_c = 200;
2929
let max_nb_connection = 15;
@@ -50,7 +50,7 @@ fn main() {
5050
start = ProcessTime::now();
5151
begin_t = SystemTime::now();
5252
for _i in 0..data_with_id.len() {
53-
hns.insert(data_with_id[_i]);
53+
hns.insert((data_with_id[_i].0.as_slice(), data_with_id[_i].1))
5454
}
5555
cpu_time = start.elapsed();
5656
println!("\n\n serial hnsw data insertion {:?}", cpu_time);

Diff for: src/api.rs

+19-19
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Api for external language.
22
//! This file provides a trait to be used as an opaque pointer for C or Julia calls used in file libext.rs
33
4-
use std::path::PathBuf;
4+
use std::path::Path;
55

66
use serde::{de::DeserializeOwned, Serialize};
77

@@ -12,16 +12,16 @@ use anndists::dist::distances::Distance;
1212
pub trait AnnT {
1313
/// type of data vectors
1414
type Val;
15-
///
16-
fn insert_data(&mut self, data: &Vec<Self::Val>, id: usize);
17-
///
18-
fn search_neighbours(&self, data: &Vec<Self::Val>, knbn: usize, ef_s: usize) -> Vec<Neighbour>;
19-
///
20-
fn parallel_insert_data(&mut self, data: &Vec<(&Vec<Self::Val>, usize)>);
21-
///
15+
//
16+
fn insert_data(&mut self, data: &[Self::Val], id: usize);
17+
//
18+
fn search_neighbours(&self, data: &[Self::Val], knbn: usize, ef_s: usize) -> Vec<Neighbour>;
19+
//
20+
fn parallel_insert_data(&mut self, data: &[(&Vec<Self::Val>, usize)]);
21+
//
2222
fn parallel_search_neighbours(
2323
&self,
24-
data: &Vec<Vec<Self::Val>>,
24+
data: &[Vec<Self::Val>],
2525
knbn: usize,
2626
ef_s: usize,
2727
) -> Vec<Vec<Neighbour>>;
@@ -32,7 +32,7 @@ pub trait AnnT {
3232
/// **We do not overwrite old files if they are currently in use by memory map**
3333
/// If these files already exist , they are not overwritten and a unique filename is generated by concatenating a random number to filename.
3434
/// The function returns the basename used for the dump
35-
fn file_dump(&self, path: &PathBuf, file_basename: &str) -> anyhow::Result<String>;
35+
fn file_dump(&self, path: &Path, file_basename: &str) -> anyhow::Result<String>;
3636
}
3737

3838
impl<'b, T, D> AnnT for Hnsw<'b, T, D>
@@ -41,21 +41,21 @@ where
4141
D: Distance<T> + Send + Sync,
4242
{
4343
type Val = T;
44-
///
45-
fn insert_data(&mut self, data: &Vec<Self::Val>, id: usize) {
44+
//
45+
fn insert_data(&mut self, data: &[Self::Val], id: usize) {
4646
self.insert((data, id));
4747
}
48-
///
49-
fn search_neighbours(&self, data: &Vec<T>, knbn: usize, ef_s: usize) -> Vec<Neighbour> {
48+
//
49+
fn search_neighbours(&self, data: &[T], knbn: usize, ef_s: usize) -> Vec<Neighbour> {
5050
self.search(data, knbn, ef_s)
5151
}
52-
fn parallel_insert_data(&mut self, data: &Vec<(&Vec<Self::Val>, usize)>) {
52+
fn parallel_insert_data(&mut self, data: &[(&Vec<Self::Val>, usize)]) {
5353
self.parallel_insert(data);
5454
}
5555

5656
fn parallel_search_neighbours(
5757
&self,
58-
data: &Vec<Vec<Self::Val>>,
58+
data: &[Vec<Self::Val>],
5959
knbn: usize,
6060
ef_s: usize,
6161
) -> Vec<Vec<Neighbour>> {
@@ -68,7 +68,7 @@ where
6868
///
6969
7070
///
71-
fn file_dump(&self, path: &PathBuf, file_basename: &str) -> anyhow::Result<String> {
71+
fn file_dump(&self, path: &Path, file_basename: &str) -> anyhow::Result<String> {
7272
log::info!("in Hnsw::file_dump");
7373
//
7474
// do not overwrite if mmap is active
@@ -81,9 +81,9 @@ where
8181
dumpinit.flush()?;
8282
log::info!("end of dump");
8383
if res.is_ok() {
84-
return Ok(dumpname);
84+
Ok(dumpname)
8585
} else {
86-
return Err(anyhow::anyhow!("unexpected error"));
86+
Err(anyhow::anyhow!("unexpected error"))
8787
}
8888
} // end of dump
8989
} // end of impl block AnnT for Hnsw<T,D>

Diff for: src/datamap.rs

+26-28
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use std::io::BufReader;
99

1010
use std::fs::{File, OpenOptions};
11-
use std::path::PathBuf;
11+
use std::path::{Path, PathBuf};
1212

1313
use indexmap::map::IndexMap;
1414
use log::log_enabled;
@@ -32,7 +32,7 @@ pub struct DataMap {
3232
t_name: String,
3333
/// dimension of data vector
3434
dimension: usize,
35-
///
35+
//
3636
distname: String,
3737
} // end of DataMap
3838

@@ -41,11 +41,14 @@ impl DataMap {
4141
/// The fname argument corresponds to the basename of the dump.
4242
/// To reload from file fname.hnsw.data just pass fname as argument.
4343
/// The dir argument is the directory where the fname.hnsw.data and fname.hnsw.graph reside.
44-
pub fn from_hnswdump<T: std::fmt::Debug>(dir: &str, fname: &String) -> Result<DataMap, String> {
44+
pub fn from_hnswdump<T: std::fmt::Debug>(
45+
dir: &Path,
46+
file_name: &str,
47+
) -> Result<DataMap, String> {
4548
// reload description to have data type, and check for dump version
46-
let mut graphpath = PathBuf::new();
49+
let mut graphpath = PathBuf::from(dir);
4750
graphpath.push(dir);
48-
let mut filename = fname.clone();
51+
let mut filename = file_name.to_string();
4952
filename.push_str(".hnsw.graph");
5053
graphpath.push(filename);
5154
let graphfileres = OpenOptions::new().read(true).open(&graphpath);
@@ -82,7 +85,7 @@ impl DataMap {
8285
//
8386
let mut datapath = PathBuf::new();
8487
datapath.push(dir);
85-
let mut filename = fname.clone();
88+
let mut filename = file_name.to_string();
8689
filename.push_str(".hnsw.data");
8790
datapath.push(filename);
8891
//
@@ -132,8 +135,8 @@ impl DataMap {
132135
&mapped_slice[current_mmap_addr..current_mmap_addr + std::mem::size_of::<usize>()],
133136
);
134137
current_mmap_addr += std::mem::size_of::<usize>();
135-
let dimension = usize::from_ne_bytes(usize_slice) as usize;
136-
if dimension as usize != descr_dimension {
138+
let dimension = usize::from_ne_bytes(usize_slice);
139+
if dimension != descr_dimension {
137140
log::error!("description and data do not agree on dimension, data got : {:?}, description got : {:?}",dimension, descr_dimension);
138141
return Err(String::from(
139142
"description and data do not agree on dimension",
@@ -195,16 +198,13 @@ impl DataMap {
195198
if log_enabled!(log::Level::Debug) && i == 0 {
196199
log::debug!("serialized bytes len to reload {:?}", serialized_len);
197200
}
198-
let mut v_serialized = Vec::<u8>::with_capacity(serialized_len);
199-
// TODO avoid initialization
200-
v_serialized.resize(serialized_len as usize, 0);
201+
let mut v_serialized = vec![0; serialized_len];
201202
v_serialized.copy_from_slice(
202203
&mapped_slice[current_mmap_addr..current_mmap_addr + serialized_len],
203204
);
204205
current_mmap_addr += serialized_len;
205-
let slice_t = unsafe {
206-
std::slice::from_raw_parts(v_serialized.as_ptr() as *const T, dimension as usize)
207-
};
206+
let slice_t =
207+
unsafe { std::slice::from_raw_parts(v_serialized.as_ptr() as *const T, dimension) };
208208
log::trace!(
209209
"deserialized v : {:?} address : {:?} ",
210210
slice_t,
@@ -223,7 +223,7 @@ impl DataMap {
223223
distname,
224224
};
225225
//
226-
return Ok(datamap);
226+
Ok(datamap)
227227
} // end of from_datas
228228

229229
//
@@ -252,14 +252,14 @@ impl DataMap {
252252
let datat_name_arg_last = datat_name_vec.last().unwrap();
253253
//
254254
if datat_name_arg_last == tname_last {
255-
return true;
255+
true
256256
} else {
257257
log::info!(
258258
"data type in DataMap : {}, type arg = {}",
259259
tname_last,
260260
datat_name_arg_last
261261
);
262-
return false;
262+
false
263263
}
264264
} // end of check_data_type
265265

@@ -272,12 +272,9 @@ impl DataMap {
272272
pub fn get_data<'a, T: Clone + std::fmt::Debug>(&'a self, dataid: &DataId) -> Option<&'a [T]> {
273273
//
274274
log::trace!("in DataMap::get_data, dataid : {:?}", dataid);
275-
let address = self.hmap.get(dataid);
276-
if address.is_none() {
277-
return None;
278-
}
279-
log::debug!(" adress for id : {}, address : {:?}", dataid, address);
280-
let mut current_mmap_addr = *address.unwrap();
275+
let address = self.hmap.get(dataid)?;
276+
log::debug!(" address for id : {}, address : {:?}", dataid, address);
277+
let mut current_mmap_addr = *address;
281278
let mapped_slice = self.mmap.as_slice();
282279
let mut u64_slice = [0u8; std::mem::size_of::<u64>()];
283280
u64_slice.copy_from_slice(
@@ -289,7 +286,7 @@ impl DataMap {
289286
let slice_t = unsafe {
290287
std::slice::from_raw_parts(
291288
mapped_slice[current_mmap_addr..].as_ptr() as *const T,
292-
self.dimension as usize,
289+
self.dimension,
293290
)
294291
};
295292
Some(slice_t)
@@ -303,12 +300,12 @@ impl DataMap {
303300

304301
/// returns full data type name
305302
pub fn get_data_typename(&self) -> String {
306-
return self.t_name.clone();
303+
self.t_name.clone()
307304
}
308305

309306
/// returns full data type name
310307
pub fn get_distname(&self) -> String {
311-
return self.distname.clone();
308+
self.distname.clone()
312309
}
313310

314311
/// return the number of data in mmap
@@ -382,7 +379,7 @@ mod tests {
382379
}
383380
//
384381
// now we have check that datamap seems ok, test reload of hnsw with mmap
385-
let datamap: DataMap = DataMap::from_hnswdump::<f32>(".", &fname).unwrap();
382+
let datamap: DataMap = DataMap::from_hnswdump::<f32>(&Path::new("."), &fname).unwrap();
386383
let nb_test = 30;
387384
log::info!("checking random access of id , nb test : {}", nb_test);
388385
for _ in 0..nb_test {
@@ -439,7 +436,8 @@ mod tests {
439436
let directory = PathBuf::from(".");
440437
let _res = hnsw.file_dump(&directory, &fname);
441438
// now we have check that datamap seems ok, test reload of hnsw with mmap
442-
let datamap: DataMap = DataMap::from_hnswdump::<u32>(".", &fname.to_string()).unwrap();
439+
let datamap: DataMap =
440+
DataMap::from_hnswdump::<u32>(&directory.as_path(), &fname.to_string()).unwrap();
443441
// testing type check
444442
assert!(datamap.check_data_type::<u32>());
445443
assert!(!datamap.check_data_type::<f32>());

Diff for: src/filter.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@ pub trait FilterT {
1010

1111
impl FilterT for Vec<usize> {
1212
fn hnsw_filter(&self, id: &DataId) -> bool {
13-
return match &self.binary_search(id) {
14-
Ok(_) => true,
15-
_ => false,
16-
};
13+
self.binary_search(id).is_ok()
1714
}
1815
}
1916

@@ -22,6 +19,6 @@ where
2219
F: Fn(&DataId) -> bool,
2320
{
2421
fn hnsw_filter(&self, id: &DataId) -> bool {
25-
return self(id);
22+
self(id)
2623
}
2724
}

Diff for: src/flatten.rs

+21-29
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@ use hnsw::*;
1717

1818
impl PartialEq for Neighbour {
1919
fn eq(&self, other: &Neighbour) -> bool {
20-
return self.distance == other.distance;
20+
self.distance == other.distance
2121
} // end eq
2222
}
2323

2424
impl Eq for Neighbour {}
2525

2626
// order points by distance to self.
27+
#[allow(clippy::non_canonical_partial_ord_impl)]
2728
impl PartialOrd for Neighbour {
2829
fn partial_cmp(&self, other: &Neighbour) -> Option<Ordering> {
2930
self.distance.partial_cmp(&other.distance)
@@ -57,15 +58,15 @@ pub struct FlatPoint {
5758
impl FlatPoint {
5859
/// returns the neighbours orderded by distance.
5960
pub fn get_neighbours(&self) -> &Vec<Neighbour> {
60-
return &self.neighbours;
61+
&self.neighbours
6162
}
6263
/// returns the origin id of the point
6364
pub fn get_id(&self) -> DataId {
64-
return self.origin_id;
65+
self.origin_id
6566
}
66-
///
67+
//
6768
pub fn get_p_id(&self) -> PointId {
68-
return self.p_id;
69+
self.p_id
6970
}
7071
} // end impl block for FlatPoint
7172

@@ -79,12 +80,11 @@ fn flatten_point<T: Clone + Send + Sync>(point: &Point<T>) -> FlatPoint {
7980
}
8081
}
8182
flat_neighbours.sort_unstable();
82-
let fpoint = FlatPoint {
83+
FlatPoint {
8384
origin_id: point.get_origin_id(),
8485
p_id: point.get_point_id(),
8586
neighbours: flat_neighbours,
86-
};
87-
fpoint
87+
}
8888
} // end of flatten_point
8989

9090
/// A structure providing neighbourhood information of a point stored in the Hnsw structure given its DataId.
@@ -98,11 +98,11 @@ impl FlatNeighborhood {
9898
/// get neighbour of a point given its id.
9999
/// The neighbours are sorted in increasing distance from data_id.
100100
pub fn get_neighbours(&self, p_id: DataId) -> Option<Vec<Neighbour>> {
101-
let res = match self.hash_t.get(&p_id) {
102-
Some(point) => Some(point.get_neighbours().clone()),
103-
_ => None,
104-
};
105-
return res;
101+
let res = self
102+
.hash_t
103+
.get(&p_id)
104+
.map(|point| point.get_neighbours().clone());
105+
res
106106
}
107107
} // end impl block for FlatNeighborhood
108108

@@ -113,24 +113,16 @@ impl<'b, T: Clone + Send + Sync, D: Distance<T> + Send + Sync> From<&Hnsw<'b, T,
113113
/// Useful after reloading from a dump with T=NoData and D = NoDist as points are then reloaded with neighbourhood information only.
114114
fn from(hnsw: &Hnsw<T, D>) -> Self {
115115
let mut hash_t = HashMap::new();
116-
let mut ptiter = hnsw.get_point_indexation().into_iter();
116+
let pt_iter = hnsw.get_point_indexation().into_iter();
117117
//
118-
loop {
119-
if let Some(point) = ptiter.next() {
120-
// println!("point : {:?}", _point.p_id);
121-
let res_insert = hash_t.insert(point.get_origin_id(), flatten_point(&point));
122-
match res_insert {
123-
Some(old_point) => {
124-
println!("2 points with same origin id {:?}", old_point.origin_id);
125-
log::error!("2 points with same origin id {:?}", old_point.origin_id);
126-
}
127-
_ => (),
128-
} // end match
129-
} else {
130-
break;
118+
for point in pt_iter {
119+
// println!("point : {:?}", _point.p_id);
120+
let res_insert = hash_t.insert(point.get_origin_id(), flatten_point(&point));
121+
if let Some(old_point) = res_insert {
122+
log::error!("2 points with same origin id {:?}", old_point.origin_id);
131123
}
132-
} // end while
133-
return FlatNeighborhood { hash_t };
124+
}
125+
FlatNeighborhood { hash_t }
134126
}
135127
} // e,d of Fom implementation
136128

0 commit comments

Comments
 (0)