8
8
use std:: io:: BufReader ;
9
9
10
10
use std:: fs:: { File , OpenOptions } ;
11
- use std:: path:: PathBuf ;
11
+ use std:: path:: { Path , PathBuf } ;
12
12
13
13
use indexmap:: map:: IndexMap ;
14
14
use log:: log_enabled;
@@ -32,7 +32,7 @@ pub struct DataMap {
32
32
t_name : String ,
33
33
/// dimension of data vector
34
34
dimension : usize ,
35
- ///
35
+ //
36
36
distname : String ,
37
37
} // end of DataMap
38
38
@@ -41,11 +41,14 @@ impl DataMap {
41
41
/// The fname argument corresponds to the basename of the dump.
42
42
/// To reload from file fname.hnsw.data just pass fname as argument.
43
43
/// The dir argument is the directory where the fname.hnsw.data and fname.hnsw.graph reside.
44
- pub fn from_hnswdump < T : std:: fmt:: Debug > ( dir : & str , fname : & String ) -> Result < DataMap , String > {
44
+ pub fn from_hnswdump < T : std:: fmt:: Debug > (
45
+ dir : & Path ,
46
+ file_name : & str ,
47
+ ) -> Result < DataMap , String > {
45
48
// reload description to have data type, and check for dump version
46
- let mut graphpath = PathBuf :: new ( ) ;
49
+ let mut graphpath = PathBuf :: from ( dir ) ;
47
50
graphpath. push ( dir) ;
48
- let mut filename = fname . clone ( ) ;
51
+ let mut filename = file_name . to_string ( ) ;
49
52
filename. push_str ( ".hnsw.graph" ) ;
50
53
graphpath. push ( filename) ;
51
54
let graphfileres = OpenOptions :: new ( ) . read ( true ) . open ( & graphpath) ;
@@ -82,7 +85,7 @@ impl DataMap {
82
85
//
83
86
let mut datapath = PathBuf :: new ( ) ;
84
87
datapath. push ( dir) ;
85
- let mut filename = fname . clone ( ) ;
88
+ let mut filename = file_name . to_string ( ) ;
86
89
filename. push_str ( ".hnsw.data" ) ;
87
90
datapath. push ( filename) ;
88
91
//
@@ -132,8 +135,8 @@ impl DataMap {
132
135
& mapped_slice[ current_mmap_addr..current_mmap_addr + std:: mem:: size_of :: < usize > ( ) ] ,
133
136
) ;
134
137
current_mmap_addr += std:: mem:: size_of :: < usize > ( ) ;
135
- let dimension = usize:: from_ne_bytes ( usize_slice) as usize ;
136
- if dimension as usize != descr_dimension {
138
+ let dimension = usize:: from_ne_bytes ( usize_slice) ;
139
+ if dimension != descr_dimension {
137
140
log:: error!( "description and data do not agree on dimension, data got : {:?}, description got : {:?}" , dimension, descr_dimension) ;
138
141
return Err ( String :: from (
139
142
"description and data do not agree on dimension" ,
@@ -195,16 +198,13 @@ impl DataMap {
195
198
if log_enabled ! ( log:: Level :: Debug ) && i == 0 {
196
199
log:: debug!( "serialized bytes len to reload {:?}" , serialized_len) ;
197
200
}
198
- let mut v_serialized = Vec :: < u8 > :: with_capacity ( serialized_len) ;
199
- // TODO avoid initialization
200
- v_serialized. resize ( serialized_len as usize , 0 ) ;
201
+ let mut v_serialized = vec ! [ 0 ; serialized_len] ;
201
202
v_serialized. copy_from_slice (
202
203
& mapped_slice[ current_mmap_addr..current_mmap_addr + serialized_len] ,
203
204
) ;
204
205
current_mmap_addr += serialized_len;
205
- let slice_t = unsafe {
206
- std:: slice:: from_raw_parts ( v_serialized. as_ptr ( ) as * const T , dimension as usize )
207
- } ;
206
+ let slice_t =
207
+ unsafe { std:: slice:: from_raw_parts ( v_serialized. as_ptr ( ) as * const T , dimension) } ;
208
208
log:: trace!(
209
209
"deserialized v : {:?} address : {:?} " ,
210
210
slice_t,
@@ -223,7 +223,7 @@ impl DataMap {
223
223
distname,
224
224
} ;
225
225
//
226
- return Ok ( datamap) ;
226
+ Ok ( datamap)
227
227
} // end of from_datas
228
228
229
229
//
@@ -252,14 +252,14 @@ impl DataMap {
252
252
let datat_name_arg_last = datat_name_vec. last ( ) . unwrap ( ) ;
253
253
//
254
254
if datat_name_arg_last == tname_last {
255
- return true ;
255
+ true
256
256
} else {
257
257
log:: info!(
258
258
"data type in DataMap : {}, type arg = {}" ,
259
259
tname_last,
260
260
datat_name_arg_last
261
261
) ;
262
- return false ;
262
+ false
263
263
}
264
264
} // end of check_data_type
265
265
@@ -272,12 +272,9 @@ impl DataMap {
272
272
pub fn get_data < ' a , T : Clone + std:: fmt:: Debug > ( & ' a self , dataid : & DataId ) -> Option < & ' a [ T ] > {
273
273
//
274
274
log:: trace!( "in DataMap::get_data, dataid : {:?}" , dataid) ;
275
- let address = self . hmap . get ( dataid) ;
276
- if address. is_none ( ) {
277
- return None ;
278
- }
279
- log:: debug!( " adress for id : {}, address : {:?}" , dataid, address) ;
280
- let mut current_mmap_addr = * address. unwrap ( ) ;
275
+ let address = self . hmap . get ( dataid) ?;
276
+ log:: debug!( " address for id : {}, address : {:?}" , dataid, address) ;
277
+ let mut current_mmap_addr = * address;
281
278
let mapped_slice = self . mmap . as_slice ( ) ;
282
279
let mut u64_slice = [ 0u8 ; std:: mem:: size_of :: < u64 > ( ) ] ;
283
280
u64_slice. copy_from_slice (
@@ -289,7 +286,7 @@ impl DataMap {
289
286
let slice_t = unsafe {
290
287
std:: slice:: from_raw_parts (
291
288
mapped_slice[ current_mmap_addr..] . as_ptr ( ) as * const T ,
292
- self . dimension as usize ,
289
+ self . dimension ,
293
290
)
294
291
} ;
295
292
Some ( slice_t)
@@ -303,12 +300,12 @@ impl DataMap {
303
300
304
301
/// returns full data type name
305
302
pub fn get_data_typename ( & self ) -> String {
306
- return self . t_name . clone ( ) ;
303
+ self . t_name . clone ( )
307
304
}
308
305
309
306
/// returns full data type name
310
307
pub fn get_distname ( & self ) -> String {
311
- return self . distname . clone ( ) ;
308
+ self . distname . clone ( )
312
309
}
313
310
314
311
/// return the number of data in mmap
@@ -382,7 +379,7 @@ mod tests {
382
379
}
383
380
//
384
381
// now we have check that datamap seems ok, test reload of hnsw with mmap
385
- let datamap: DataMap = DataMap :: from_hnswdump :: < f32 > ( "." , & fname) . unwrap ( ) ;
382
+ let datamap: DataMap = DataMap :: from_hnswdump :: < f32 > ( & Path :: new ( "." ) , & fname) . unwrap ( ) ;
386
383
let nb_test = 30 ;
387
384
log:: info!( "checking random access of id , nb test : {}" , nb_test) ;
388
385
for _ in 0 ..nb_test {
@@ -439,7 +436,8 @@ mod tests {
439
436
let directory = PathBuf :: from ( "." ) ;
440
437
let _res = hnsw. file_dump ( & directory, & fname) ;
441
438
// now we have check that datamap seems ok, test reload of hnsw with mmap
442
- let datamap: DataMap = DataMap :: from_hnswdump :: < u32 > ( "." , & fname. to_string ( ) ) . unwrap ( ) ;
439
+ let datamap: DataMap =
440
+ DataMap :: from_hnswdump :: < u32 > ( & directory. as_path ( ) , & fname. to_string ( ) ) . unwrap ( ) ;
443
441
// testing type check
444
442
assert ! ( datamap. check_data_type:: <u32 >( ) ) ;
445
443
assert ! ( !datamap. check_data_type:: <f32 >( ) ) ;
0 commit comments