JTA5GVUCWCLJZVNG4FVXFLSV7HSRJQFHQ6CRURD37JGVFPUKMT7QC
if self.normalise {
let v_norm = v.norm();
v /= v_norm;
}
let start = *row_idx.last().unwrap();
let row_size = sort_dedup(
&mut col_idx[start..],
&mut values[start..],
&mut sorting_buf,
);
size += row_size;
let end = start + row_size;
col_idx.truncate(end);
values.truncate(end);
row_idx.push(size);
fn sort_dedup(indices: &mut [usize], values: &mut [f64], buf: &mut Vec<(usize, f64)>) -> usize {
buf.clear();
buf.reserve_exact(indices.len());
buf.extend(indices.iter().zip(values.iter()).map(|(&i, &v)| (i, v)));
// We sort first
buf.sort_unstable_by_key(|&(i, _)| i);
// And now we dedup
let mut last = None;
let mut target_iter = indices.iter_mut().zip(values.iter_mut());
let mut target = target_iter.next().unwrap();
let mut size = 0;
for (idx, val) in buf.drain(..) {
match last {
Some(li) => {
if idx == li {
*target.1 += val;
} else {
target = target_iter.next().unwrap();
size += 1;
*target.0 = idx;
*target.1 = val;
last = Some(idx);
}
}
None => {
*target.0 = idx;
*target.1 = val;
last = Some(idx);
size += 1;
}
}
}
// Iterate over every column (Axis 1) of the matrix and count the occurence of each term, meaning +1 for every document where
// the value is != 0
let df_iter = matrix
.axis_iter(Axis(1))
.map(|r| r.fold(0.0, |acc, &cur| if cur != 0.0 { acc + 1. } else { acc }));
let mut idf_vector: Array<f64, _> = ArrayBase::from_iter(df_iter);
let mut idf_vector = Array1::from_iter(df.into_iter());
let input = [
"This is the first document.",
"This document is the second document.",
"And this is the third one.",
"Is this the first document?",
];
let r = v.fit_transform(&input);
let r = v.fit_transform(&INPUT).to_dense();
name = "crossbeam-deque"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
dependencies = [
"cfg-if",
"crossbeam-utils",
"lazy_static",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
version = "0.3.1"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.4.0"
name = "rayon"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"lazy_static",
"num_cpus",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
[[package]]
name = "smallvec"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
[[package]]
name = "sprs"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea71e48b3eab4c4b153e8e35dcaeac132720809ef68359097b8cb54a18edd70"
dependencies = [
"alga",
"ndarray",
"num-complex 0.4.0",
"num-traits",
"num_cpus",
"rayon",
"smallvec",
]