#!/usr/bin/env bb
;; xm-tool-tracker.bb - Tool invocation tracking with interruption detection
;; Seed 1069: [+1, -1, -1, +1, +1, +1, +1]
;;
;; Resolution sufficient to detect when a tool invocation was interrupted:
;; - Parses history for antml function_calls and invoke patterns
;; - Tracks start timestamps from invocation
;; - Tracks end timestamps from function_results
;; - Marks orphaned invocations as interrupted
;;
;; Usage:
;;   ./xm-tool-tracker.bb extract          # Extract tool invocations from history
;;   ./xm-tool-tracker.bb detect           # Detect interrupted invocations
;;   ./xm-tool-tracker.bb stats            # Show tool completion stats
;;   ./xm-tool-tracker.bb pending          # Show pending invocations
;;   ./xm-tool-tracker.bb mark-interrupted # Mark stale pending as interrupted

(require '[babashka.process :refer [shell]]
         '[clojure.string :as str]
         '[cheshire.core :as json]
         '[babashka.fs :as fs])

;;; ============================================================
;;; Configuration
;;; ============================================================

(def XM_DB_PATH
  (or (System/getenv "XM_DB_PATH")
      (str (System/getProperty "user.home") "/xm.duckdb")))

(def HISTORY_SOURCES
  {:claude (str (System/getProperty "user.home") "/.claude/history.jsonl")
   :duck (str (System/getProperty "user.home") "/.duck/history.jsonl")
   :codex (str (System/getProperty "user.home") "/.codex/history.jsonl")
   :postduck (str (System/getProperty "user.home") "/.postduck/history.jsonl")
   :postpostduck (str (System/getProperty "user.home") "/.postpostduck/history.jsonl")
   :preprepreduck (str (System/getProperty "user.home") "/.preprepreduck/history.jsonl")
   :postcodex (str (System/getProperty "user.home") "/.postcodex/history.jsonl")})

;; Interruption threshold in minutes
(def INTERRUPTION_THRESHOLD_MINUTES 5)

;; GF(3) trits for tool operations
(def TRIT_MINUS -1)   ;; Read operations (Grep, Read, Glob)
(def TRIT_ERGODIC 0)  ;; Transform operations (Edit, NotebookEdit)
(def TRIT_PLUS +1)    ;; Create operations (Write, Bash, Task)

(def TOOL_TRITS
  {"Read" TRIT_MINUS
   "Grep" TRIT_MINUS
   "Glob" TRIT_MINUS
   "WebFetch" TRIT_MINUS
   "WebSearch" TRIT_MINUS
   "Edit" TRIT_ERGODIC
   "NotebookEdit" TRIT_ERGODIC
   "TodoWrite" TRIT_ERGODIC
   "Write" TRIT_PLUS
   "Bash" TRIT_PLUS
   "Task" TRIT_PLUS
   "KillShell" TRIT_MINUS
   "TaskOutput" TRIT_MINUS
   "AskUserQuestion" TRIT_ERGODIC
   "EnterPlanMode" TRIT_ERGODIC
   "ExitPlanMode" TRIT_ERGODIC
   "Skill" TRIT_PLUS})

;;; ============================================================
;;; DuckDB Integration
;;; ============================================================

(defn duck-query [sql]
  "Execute SQL against xm.duckdb"
  (let [result (shell {:out :string :err :string :continue true}
                      "duckdb" XM_DB_PATH "-json" "-c" sql)]
    (if (= 0 (:exit result))
      {:ok true :data (try (json/parse-string (:out result) true)
                           (catch Exception _ (:out result)))}
      {:ok false :error (:err result)})))

(defn duck-exec [sql]
  "Execute SQL (non-query)"
  (let [result (shell {:out :string :err :string :continue true}
                      "duckdb" XM_DB_PATH "-c" sql)]
    {:ok (= 0 (:exit result))
     :error (when (not= 0 (:exit result)) (:err result))}))

;;; ============================================================
;;; Tool Invocation Pattern Extraction
;;; ============================================================

(defn extract-tool-calls [content]
  "Extract tool calls from content using regex patterns.
   Looks for antml:invoke patterns with tool names."
  (when content
    (let [;; Pattern: <invoke name="ToolName">
          invoke-re #"<invoke\s+name=\"([^\"]+)\">"
          ;; Pattern: function_results or tool results
          result-re #"<function_results>|<result>"
          invokes (re-seq invoke-re content)
          has-results (re-find result-re content)]
      (when (seq invokes)
        {:tools (mapv second invokes)
         :has_results (boolean has-results)
         :count (count invokes)}))))

(defn parse-history-line [line source]
  "Parse a single JSONL line and extract tool invocations"
  (try
    (let [data (json/parse-string line true)
          ts (or (:timestamp data) (:ts data))
          session-id (:session_id data)
          content (or (:display data) (:text data) (:content data) "")
          tool-info (extract-tool-calls content)]
      (when tool-info
        {:timestamp ts
         :session_id session-id
         :source (name source)
         :tools (:tools tool-info)
         :has_results (:has_results tool-info)
         :tool_count (:count tool-info)
         :raw_content (subs content 0 (min 500 (count content)))}))
    (catch Exception _ nil)))

(defn process-history-file [source path]
  "Process a history file and extract all tool invocations"
  (when (fs/exists? path)
    (println (str "  Processing " (name source) ": " path))
    (with-open [rdr (clojure.java.io/reader path)]
      (->> (line-seq rdr)
           (keep #(parse-history-line % source))
           (into [])))))

;;; ============================================================
;;; Invocation Recording
;;; ============================================================

(defn record-invocations [invocations]
  "Record tool invocations to xm_tool_invocations table"
  (let [next-id (atom (or (-> (duck-query "SELECT COALESCE(MAX(invocation_id), 0) + 1 as next_id FROM xm_tool_invocations")
                              :data first :next_id)
                         1))]
    (doseq [{:keys [timestamp session_id source tools has_results]} invocations]
      (doseq [tool tools]
        (let [id (swap! next-id inc)
              trit (get TOOL_TRITS tool TRIT_ERGODIC)
              status (if has_results "complete" "pending")
              correlation-id (str session_id "-" timestamp "-" tool)
              sql (str "INSERT INTO xm_tool_invocations 
                        (invocation_id, tool_name, start_ts, status, correlation_id, source, session_id, trit)
                        VALUES (" id ", '" tool "', "
                        (if timestamp (str "'" timestamp "'") "CURRENT_TIMESTAMP") ", "
                        "'" status "', "
                        "'" correlation-id "', "
                        "'" source "', "
                        (if session_id (str "'" session_id "'") "NULL") ", "
                        trit ")")]
          (duck-exec sql))))))

;;; ============================================================
;;; Commands
;;; ============================================================

(defn cmd-extract []
  "Extract tool invocations from all history sources"
  (println "Extracting tool invocations...")
  (println (str "  Seed 1069: [+1, -1, -1, +1, +1, +1, +1]"))
  (println "")
  
  ;; Clear existing data
  (duck-exec "DELETE FROM xm_tool_invocations")
  
  (let [all-invocations (atom [])]
    (doseq [[source path] HISTORY_SOURCES]
      (when-let [invocations (process-history-file source path)]
        (swap! all-invocations concat invocations)))
    
    (println "")
    (println (str "  Found " (count @all-invocations) " entries with tool invocations"))
    
    (when (seq @all-invocations)
      (record-invocations @all-invocations)
      (println "  Recorded to xm_tool_invocations"))
    
    ;; Show stats
    (let [stats (duck-query "SELECT * FROM xm_tool_stats ORDER BY total_calls DESC LIMIT 15")]
      (println "")
      (println "Tool Statistics:")
      (doseq [row (:data stats)]
        (println (str "  " (:tool_name row) ": "
                      (:total_calls row) " calls, "
                      (:completed row) " complete, "
                      (:pending row) " pending"))))))

(defn cmd-detect []
  "Detect interrupted tool invocations"
  (println "Detecting interrupted invocations...")
  (println (str "  Threshold: " INTERRUPTION_THRESHOLD_MINUTES " minutes"))
  (println "")
  
  (let [result (duck-query "SELECT * FROM xm_interrupted_tools ORDER BY minutes_pending DESC LIMIT 20")]
    (if (seq (:data result))
      (do
        (println (str "  Found " (count (:data result)) " potentially interrupted invocations:"))
        (println "")
        (doseq [row (:data result)]
          (println (str "  " (:tool_name row) 
                       " | " (format "%.1f" (:minutes_pending row)) " min pending"
                       " | " (:source row)
                       " | " (subs (or (:tool_params row) "") 0 (min 50 (count (or (:tool_params row) ""))))))))
      (println "  No interrupted invocations detected."))))

(defn cmd-stats []
  "Show tool completion statistics"
  (println "Tool Completion Statistics")
  (println (str "  Seed 1069: [+1, -1, -1, +1, +1, +1, +1]"))
  (println "")
  
  (let [stats (duck-query "SELECT * FROM xm_tool_stats ORDER BY total_calls DESC")]
    (println (format "  %-20s %8s %8s %8s %8s %10s" 
                     "Tool" "Total" "Complete" "Pending" "Errors" "Avg(ms)"))
    (println "  " (str/join "" (repeat 70 "-")))
    (doseq [row (:data stats)]
      (println (format "  %-20s %8d %8d %8d %8d %10.0f"
                       (:tool_name row)
                       (:total_calls row)
                       (:completed row)
                       (:pending row)
                       (or (:errors row) 0)
                       (or (:avg_duration_ms row) 0.0)))))
  
  ;; GF(3) trit distribution
  (println "")
  (let [trit-stats (duck-query "SELECT trit, COUNT(*) as count FROM xm_tool_invocations GROUP BY trit ORDER BY trit")]
    (println "GF(3) Trit Distribution:")
    (doseq [row (:data trit-stats)]
      (println (str "  " (case (:trit row) -1 "MINUS (-1)" 0 "ERGODIC (0)" 1 "PLUS (+1)" "?") 
                   ": " (:count row))))))

(defn cmd-pending []
  "Show pending tool invocations"
  (println "Pending Tool Invocations")
  (println "")
  
  (let [pending (duck-query "SELECT tool_name, start_ts, source, session_id,
                              EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - start_ts)) / 60.0 as minutes_pending
                             FROM xm_tool_invocations 
                             WHERE status = 'pending'
                             ORDER BY start_ts DESC
                             LIMIT 30")]
    (if (seq (:data pending))
      (doseq [row (:data pending)]
        (println (str "  " (:tool_name row) 
                     " | " (format "%.1f" (or (:minutes_pending row) 0)) " min"
                     " | " (:source row)
                     " | " (:start_ts row))))
      (println "  No pending invocations."))))

(defn cmd-mark-interrupted []
  "Mark stale pending invocations as interrupted"
  (println (str "Marking invocations pending > " INTERRUPTION_THRESHOLD_MINUTES " minutes as interrupted..."))
  
  (let [update-sql (str "UPDATE xm_tool_invocations 
                         SET status = 'interrupted',
                             end_ts = CURRENT_TIMESTAMP
                         WHERE status = 'pending'
                           AND start_ts < CURRENT_TIMESTAMP - INTERVAL '" 
                        INTERRUPTION_THRESHOLD_MINUTES " minutes'")
        result (duck-exec update-sql)]
    (if (:ok result)
      (let [count-result (duck-query "SELECT COUNT(*) as count FROM xm_tool_invocations WHERE status = 'interrupted'")]
        (println (str "  Marked " (get-in count-result [:data 0 :count] 0) " invocations as interrupted.")))
      (println (str "  Error: " (:error result))))))

(defn cmd-resolution []
  "Show temporal resolution of tool tracking"
  (println "Temporal Resolution Analysis")
  (println (str "  Seed 1069: [+1, -1, -1, +1, +1, +1, +1]"))
  (println "")
  
  (let [resolution (duck-query "SELECT 
    MIN(start_ts) as earliest,
    MAX(start_ts) as latest,
    COUNT(*) as total_invocations,
    COUNT(DISTINCT DATE_TRUNC('hour', start_ts)) as distinct_hours,
    COUNT(DISTINCT DATE_TRUNC('minute', start_ts)) as distinct_minutes,
    COUNT(DISTINCT DATE_TRUNC('second', start_ts)) as distinct_seconds
    FROM xm_tool_invocations
    WHERE start_ts IS NOT NULL")]
    (let [row (first (:data resolution))]
      (println (str "  Earliest: " (:earliest row)))
      (println (str "  Latest:   " (:latest row)))
      (println (str "  Total:    " (:total_invocations row) " invocations"))
      (println (str "  Distinct hours:   " (:distinct_hours row)))
      (println (str "  Distinct minutes: " (:distinct_minutes row)))
      (println (str "  Distinct seconds: " (:distinct_seconds row)))
      (println "")
      (println "  Resolution: Second-level granularity for interruption detection"))))

;;; ============================================================
;;; Main
;;; ============================================================

(defn print-usage []
  (println "
xm-tool-tracker.bb - Tool invocation tracking with interruption detection
Seed 1069: [+1, -1, -1, +1, +1, +1, +1]

Usage: xm-tool-tracker.bb [command]

Commands:
  extract            Extract tool invocations from all history sources
  detect             Detect interrupted invocations (pending > 5 min)
  stats              Show tool completion statistics
  pending            Show currently pending invocations
  mark-interrupted   Mark stale pending as interrupted
  resolution         Show temporal resolution analysis

Resolution:
  - Tracks tool invocations at second-level granularity
  - Detects interruptions via orphaned start timestamps
  - GF(3) trit classification: MINUS (read), ERGODIC (transform), PLUS (create)
"))

(defn -main [& args]
  (let [cmd (first args)]
    (case cmd
      "extract" (cmd-extract)
      "detect" (cmd-detect)
      "stats" (cmd-stats)
      "pending" (cmd-pending)
      "mark-interrupted" (cmd-mark-interrupted)
      "resolution" (cmd-resolution)
      (nil "-h" "--help" "help") (print-usage)
      (do
        (println (str "Unknown command: " cmd))
        (print-usage)
        (System/exit 1)))))

(apply -main *command-line-args*)