OKSUMZ3O5L5WJU563XBC5VYLJOZXI7QXNN5BTR4DUMYFP6IRZZLAC HCOPW5FXSTZYJNQNXT3H7SVD34DMPYHRFGL5UFGNR5JYNDXVK7LQC 2Y2ZW565SRONQ2UXPLX5SRP2HDFWMRF5KDXKSKVRCHBBGEGMTVIQC DORZF5HSV672ZP5HUDYB3J6TBH5O2LMXJE4HPSE7H5SOGZQBDCXQC 65G4H2V6262GLHTPQQ5H4NIPDJB7HRPBRVNAD2EL26N75YUM5PWQC ZVYOPUNH7UJL3YALGNNXQW2B4H4ONI5Z6XWAUZUONFG7LR55W4SQC 7NS27QXZMVTZBK4VPMYL5IKGSTTAWR6NDG5SOVITNX44VNIRZPMAC 5LMYPB2QHNVDLYCRWLOMCPY35ZKHHPYVW5XHASE66L6PJZSOCXYQC IFVRAERTCCDICNTYTG3TX2WASB6RXQQEJWWXQMQZJSQDQ3HLE5OQC L4STQEXDGCPZXDHTEUBCOQKBMTFDRVXRLNFQHPDHOVXDCJO33LQQC OGLLBQQYE5KICDMI6EX7ZI4TZT5RB7UFHH7O2DUOZ44QQXVL5YAAC 2GJMZ6YA6OPHNS5KFFFI6POQ2BJ33SSS3NIPXYBFTJSN4BZBVEVAC OQ7Q4PCX3EKNP5IGOCSCTZGWAKX6HQYSEHRI7BPGINJFAXSVUANQC # === BULK_FILE_IMPORT TOOL ===echo ""echo "=== bulk_file_import Tool ==="echo ""# Create test CSVCSV_FILE="/tmp/test_bulk_import_$$.csv"LOG_FILE="/tmp/test_bulk_import_$$.log"cat > "$CSV_FILE" << EOFlocation_name,location_id,directory_path,date_range,sample_rate,file_countTest Location,$LOCATION_ID,/nonexistent/path,2024-01,250000,0EOFecho "Created test CSV: $CSV_FILE"# Test 5: Non-existent CSVecho ""echo "Test 5: Non-existent CSV file (should fail)"result=$(send_request "tools/call" '{"name":"bulk_file_import","arguments":{"dataset_id":"'"$DATASET_ID"'","csv_path":"/nonexistent/file.csv","log_file_path":"/tmp/test.log"}}' "$DB_PATH")run_test "Reject non-existent CSV" "false" "$result"# Test 6: Invalid dataset IDecho ""echo "Test 6: Invalid dataset_id for bulk import (should fail)"result=$(send_request "tools/call" '{"name":"bulk_file_import","arguments":{"dataset_id":"INVALID123456","csv_path":"'"$CSV_FILE"'","log_file_path":"'"$LOG_FILE"'"}}' "$DB_PATH")run_test "Reject invalid dataset_id" "false" "$result"# Test 7: Valid CSV but nonexistent directories (tests CSV parsing)echo ""echo "Test 7: Valid CSV parsing (directory errors expected)"result=$(send_request "tools/call" '{"name":"bulk_file_import","arguments":{"dataset_id":"'"$DATASET_ID"'","csv_path":"'"$CSV_FILE"'","log_file_path":"'"$LOG_FILE"'"}}' "$DB_PATH")# This will fail because directory doesn't exist, but CSV parsing should workis_err=$(echo "$result" | jq -r '.result.isError // false')if [ "$is_err" = "true" ]; then# Check if it's a directory error (expected) vs CSV parsing errorerror_msg=$(echo "$result" | jq -r '.result.content[0].text // ""')if echo "$error_msg" | grep -qi "directory\|not found\|no such"; thenecho -e "${GREEN}✓${NC} CSV parsed correctly, directory error expected"((TESTS_RUN++))((TESTS_PASSED++))elseecho -e "${YELLOW}⚠${NC} Unexpected error: $error_msg"((TESTS_RUN++))((TESTS_PASSED++))fielseecho -e "${GREEN}✓${NC} Bulk import executed"((TESTS_RUN++))((TESTS_PASSED++))fi# Cleanuprm -f "$CSV_FILE" "$LOG_FILE"
echo ""echo "For bulk import, use the CLI tool:"echo " skraak import bulk --db ./db/skraak.duckdb --dataset abc123 --csv import.csv --log progress.log"
- **Import tools (4)**: `import_audio_files`, `import_audio_file`, `import_ml_selections`, `bulk_file_import`
- **Import tools (3)**: `import_audio_files`, `import_audio_file`, `import_ml_selections`
mcp.AddTool(server, &mcp.Tool{Name: "bulk_file_import",Description: "Batch import WAV files across multiple locations/clusters using a CSV file. CSV must have columns (in order): location_name, location_id, directory_path, date_range, sample_rate, file_count. Auto-creates clusters using date_range as cluster name. Logs progress to file for monitoring. Synchronous/fail-fast execution.",}, mcpBulkFileImport)
7. **test_resources_prompts.sh [db_path]** - Tests resources and prompts8. **test_all_prompts.sh [db_path]** - Tests all 6 prompts
6. **test_resources_prompts.sh [db_path]** - Tests resources and prompts7. **test_all_prompts.sh [db_path]** - Tests all 6 prompts
- `bulk_file_import` - Batch import WAV files across multiple locations/clusters using CSV- **Input**: CSV file (see format below)- **Auto-creates clusters**: Creates clusters if they don't exist for location/date_range combinations- **Progress logging**: Writes detailed progress to log file for real-time monitoring (use `tail -f`)- **Synchronous execution**: Processes locations sequentially, fail-fast on errors- **Summary statistics**: Returns counts for clusters, files, duplicates, errors- **Duplicate handling**: Skips files with duplicate hashes across all clusters- **Use cases**: Bulk import across many locations, automated pipelines, large-scale migration**CSV Format:**- **Header required:** First row must contain column names- **Columns (in order):**1. `location_name` - Human-readable location name (string, can have spaces)2. `location_id` - 12-character location ID from database (must exist)3. `directory_path` - Absolute path to folder containing WAV files4. `date_range` - Cluster name (e.g., "20240101-20240107" or any string)5. `sample_rate` - Sample rate in Hz (integer, e.g., 8000, 48000, 250000)6. `file_count` - Expected file count (integer, informational only)**Important:**- `date_range` becomes the cluster name in the database- If cluster already exists for location+date_range, it will be reused- All `location_id` values must exist in the database (use `execute_sql` to query)- Paths should be absolute (relative paths may fail)**Example CSV:**```csvlocation_name,location_id,directory_path,date_range,sample_rate,file_count"MOK RW 05","Ucfh8ng4DuEa","/media/david/Data/MOK RW 05","20240706-20240714","8000","432""MOK RW 06","rDmmSPsJvNtD","/media/david/Data/MOK RW 06","20240706-20240714","8000","432""mokas__01","EsDkvXosAp4C","/media/david/Data/mokas__01","20240520-20240528","8000","432"```**Tool Call Example:**```json{"name": "bulk_file_import","arguments": {"dataset_id": "abc123xyz789","csv_path": "/path/to/import.csv","log_file_path": "/path/to/progress.log"}}```