Q4YXRA3T5JUJEAGNLAY2OPFBO6Q6CH43P7KXQKKINZX6RT2PRM3QC VBFPFPJ4UR2JASCF7MVZDM3DC5XKITAFCPROT64TIN72KKQMO53AC PNAXYHCI5CUWXLZPF5SCODY5X6OIMSHR45O2LJIF6ICSBANTUXZQC SB4FZEB6ZLUHQNM3M76OGNNJY6THOF55S6JO6Q7IGXWE7OA7INFAC DBOROCRFD6A5SJBMFYFEJI5S5M77X4EFEK6KDQWA5QDMQJKIHRWQC RUF5K5CL542GK5UIIIBHPIMGGCXU72IWS5OFBVTI5DRX36OSPJDAC MNDAPW6MTTHAQ2RQFXRD4ZGX64ZM45MEU2CYVSSLWENA73KXZBEAC 7CC2YVZXAIUNWXNNVIO5KOZZFDQQLESFO72SGEDP2C4OZXAWO4KQC BD4VOH6J37BTVCLZB5WOBDTOQN2UO3NHLRMEUQVVGB3PXV4WJZIAC Y5RSXHAZFGNPQ26DRXTCHJV4Y4EZLEGFC2FBRBI5MCMOOYCB7CMAC 3ZSBMHZBDIR3SABGTLQVBGMC2YQCRM4XNVDYGSASBF6FXYJ4J74QC MF6A3QKQ6SU7PYAHJOO7YFCRB47DO7FHRA7OIZ7KHQJYJR77PAIQC }}func TestSampling(t *testing.T) {makeSegs := func(n int) []*utils.Segment {s := make([]*utils.Segment, n)for i := range s {s[i] = &utils.Segment{StartTime: float64(i), EndTime: float64(i + 1)}}return s}df1 := &utils.DataFile{FilePath: "/test/f1.data", Segments: makeSegs(6)}df2 := &utils.DataFile{FilePath: "/test/f2.data", Segments: makeSegs(4)}kept := []*utils.DataFile{df1, df2}cached := [][]*utils.Segment{df1.Segments, df2.Segments}countTotal := func(c [][]*utils.Segment) int {n := 0for _, s := range c {n += len(s)}return n}// 50% of 10 → 5k, c := applySampling(kept, cached, 50, rand.New(rand.NewSource(42)))if got := countTotal(c); got != 5 {t.Errorf("sample 50%%: expected 5, got %d", got)}// Files must be in original chronological orderfor i := 1; i < len(k); i++ {if k[i].FilePath < k[i-1].FilePath {t.Errorf("sample 50%%: files out of order at index %d", i)}
// 10% of 10 → 1_, c2 := applySampling(kept, cached, 10, rand.New(rand.NewSource(42)))if got := countTotal(c2); got != 1 {t.Errorf("sample 10%%: expected 1, got %d", got)}// 1% of 10 → clamp to 1_, c3 := applySampling(kept, cached, 1, rand.New(rand.NewSource(42)))if got := countTotal(c3); got != 1 {t.Errorf("sample 1%%: expected 1 (clamped), got %d", got)}// 99% of 10 → 9_, c4 := applySampling(kept, cached, 99, rand.New(rand.NewSource(42)))if got := countTotal(c4); got != 9 {t.Errorf("sample 99%%: expected 9, got %d", got)}
// applySampling randomly selects sample% of segments from the filtered set.// The returned files and segments preserve the original chronological order.func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {flat := make([]struct{ fileIdx, segIdx int }, 0)for fi, segs := range cachedSegs {for si := range segs {flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})}}
targetCount := len(flat) * sample / 100if targetCount < 1 {targetCount = 1}rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })selected := flat[:targetCount]// Restore chronological order before rebuildingsort.Slice(selected, func(i, j int) bool {if selected[i].fileIdx != selected[j].fileIdx {return selected[i].fileIdx < selected[j].fileIdx}return selected[i].segIdx < selected[j].segIdx})newCached := make([][]*utils.Segment, len(cachedSegs))for _, ref := range selected {newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])}var newKept []*utils.DataFilevar finalCached [][]*utils.Segmentfor i, segs := range newCached {if len(segs) > 0 {newKept = append(newKept, kept[i])finalCached = append(finalCached, segs)}}return newKept, finalCached}
case "--sample":if i+1 >= len(args) {fmt.Fprintf(os.Stderr, "Error: --sample requires a value\n")os.Exit(1)}v, err := strconv.Atoi(args[i+1])if err != nil {fmt.Fprintf(os.Stderr, "Error: --sample must be an integer\n")os.Exit(1)}if v <= 0 || v > 100 {fmt.Fprintf(os.Stderr, "Error: --sample must be between 1 and 100\n")os.Exit(1)}sample = vi += 2