UYYSWKUVHPSAX4KIQZSKDW7HB7EHWEJ2Q4EKE4MCYL5REWXVX42QC /// Iterator over ngrams analogous to those used by scikit-learn's HashingVectorizer
impl TfidfTransformer {/// Constructs a new `TfidfTransformer`////// # Arguments////// * `smooth_idf` - Adds a 1 to the numerator and denominator of the IDF-term. Prevents zero divisionspub fn new(smooth_idf: bool) -> Self {Self {smooth_idf,idf: None,}}/// Fit the transformer to the input matrixpub fn fit(&mut self, matrix: &Array<f64, Ix2>) {// We need the number of documents to calculate the idflet mut n_samples = matrix.shape()[0];// Iterate over every column (Axis 1) of the matrix and count the occurence of each term, meaning +1 for every document where// the value is != 0let df_iter = matrix.axis_iter(Axis(1)).map(|r| r.fold(0.0, |acc, &cur| if cur != 0.0 { acc + 1. } else { acc }));let mut idf_vector: Array<f64, _> = ArrayBase::from_iter(df_iter);println!("idf vector: {:?}", idf_vector);// Apply smoothing if desired. Acts as if there is one document containing every termif self.smooth_idf {idf_vector += 1.;n_samples += 1;}println!("idf vector sm: {:?}", idf_vector);// Calculate idf from dfidf_vector.mapv_inplace(|df| (n_samples as f64 / df).log10() + 1.0);self.idf = Some(idf_vector);}/// Transform the frequency matrix to a TF-IDF representationpub fn transform(&self, mut matrix: Array2<f64>) -> Array2<f64> {for mut row in matrix.axis_iter_mut(Axis(0)) {azip!((tf in &mut row, &idf in self.idf.as_ref().unwrap()) *tf = *tf * idf);let norm = row.norm();row /= norm;}matrix}pub fn idf(&self) -> Option<&Array1<f64>> {self.idf.as_ref()}}/// Iterator over n-grams analogous to those used by scikit-learn's HashingVectorizer
assert_eq!(t, a);
assert_eq!(t, r);}#[test]fn tfidf_transformation_test() {let v = HashingVectorizer::new(16, 5, 5, false);let mut tf = TfidfTransformer::new(true);let input = ["This is the first document.","This document is the second document.","And this is the third one.","Is this the first document?",];let t = array![[0.,0.,-0.3726943,-0.3726943,0.30470201,0.,0.,-0.30470201,-0.30470201,0.,0.,-0.30470201,-0.46035161,0.,-0.3726943,0.],[0.,0.3506238,-0.44759726,-0.44759726,0.18297004,0.27643583,0.,0.18297004,0.18297004,0.,-0.22379863,0.,-0.27643583,0.,-0.22379863,-0.3506238],[-0.76438624,0.,0.,0.,0.19944423,0.30132545,0.,-0.19944423,0.,0.,0.,-0.19944423,0.,0.38219312,-0.24394892,0.],[0.,0.,-0.75564616,-0.37782308,0.30889513,0.,0.,-0.30889513,-0.30889513,0.,0.,0.,0.,0.,0.,0.]];/*let t = array![[0.,0.,-0.35355339,-0.35355339,0.35355339,0.,0.,-0.35355339,-0.35355339,0.,0.,-0.35355339,-0.35355339,0.,-0.35355339,0.]];*/let r = v.fit_transform(&input);println!("{:?}", r);tf.fit(&r);println!("idf: {:?}", tf.idf());let r = tf.transform(r);assert_eq!(t, r);