(in-package :cl-user)
(defpackage slime-nregex
(:use #:common-lisp)
(:export
#:regex
#:regex-compile
))
(in-package :slime-nregex)
(eval-when (:compile-toplevel :load-toplevel :execute)
(defvar *regex-debug* nil) )
(defmacro info (message &rest args)
(if *regex-debug*
`(format *standard-output* ,message ,@args)))
(defvar *regex-groups* (make-array 10))
(defvar *regex-groupings* 0)
(defun regex (expression string)
"Usage: (regex <expression> <string)
This function will call regex-compile on the expression and then apply
the string to the returned lambda list."
(let ((findit (cond ((stringp expression)
(regex-compile expression))
((listp expression)
expression)))
(result nil))
(if (not (funcall (if (functionp findit)
findit
(eval `(function ,findit))) string))
(return-from regex nil))
(if (= *regex-groupings* 0)
(return-from regex t))
(dotimes (i *regex-groupings*)
(push (funcall 'subseq
string
(car (aref *regex-groups* i))
(cadr (aref *regex-groups* i)))
result))
(reverse result)))
(defvar *regex-special-chars* "?*+.()[]\\${}")
(defmacro add-exp (list)
"Add an item to the end of expression"
`(setf expression (append expression ,list)))
(defun regex-quoted (char-string &optional (invert nil))
"Usage: (regex-quoted <char-string> &optional invert)
Returns either the quoted character or a simple bit vector of bits set for
the matching values"
(let ((first (char char-string 0))
(result (char char-string 0))
(used-length 1))
(cond ((eql first #\n)
(setf result #\NewLine))
((eql first #\c)
(setf result #\Return))
((eql first #\t)
(setf result #\Tab))
((eql first #\d)
(setf result #*0000000000000000000000000000000000000000000000001111111111000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
((eql first #\D)
(setf result #*1111111111111111111111111111111111111111111111110000000000111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
((eql first #\w)
(setf result #*0000000000000000000000000000000000000000000000001111111111000000011111111111111111111111111000010111111111111111111111111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
((eql first #\W)
(setf result #*1111111111111111111111111111111111111111111111110000000000111111100000000000000000000000000111101000000000000000000000000001111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
((eql first #\b)
(setf result #*0000000001000000000000000000000011000000000010100000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
((eql first #\B)
(setf result #*1111111110111111111111111111111100111111111101011111111111011111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
((eql first #\s)
(setf result #*0000000001100000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000))
((eql first #\S)
(setf result #*1111111110011111111111111111111101111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111))
((and (>= (char-code first) (char-code #\0))
(<= (char-code first) (char-code #\9)))
(if (and (> (length char-string) 2)
(and (>= (char-code (char char-string 1)) (char-code #\0))
(<= (char-code (char char-string 1)) (char-code #\9))
(>= (char-code (char char-string 2)) (char-code #\0))
(<= (char-code (char char-string 2)) (char-code #\9))))
(progn
(setf result (do ((x 0 (1+ x))
(return 0))
((= x 2) return)
(setf return (+ (* return 8)
(- (char-code (char char-string x))
(char-code #\0))))))
(setf used-length 3))
(let ((group (- (char-code first) (char-code #\0))))
(setf result `((let ((nstring (subseq string (car (aref *regex-groups* ,group))
(cadr (aref *regex-groups* ,group)))))
(if (< length (+ index (length nstring)))
(return-from compare nil))
(if (not (string= string nstring
:start1 index
:end1 (+ index (length nstring))))
(return-from compare nil)
(incf index (length nstring)))))))))
(t
(setf result first)))
(if (and (vectorp result) invert)
(bit-xor result #*1111111110011111111111111111111101111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 t))
(values result used-length)))
(defun regex-compile (source &key (anchored nil))
"Usage: (regex-compile <expression> [ :anchored (t/nil) ])
This function take a regular expression (supplied as source) and
compiles this into a lambda list that a string argument can then
be applied to. It is also possible to compile this lambda list
for better performance or to save it as a named function for later
use"
(info "Now entering regex-compile with \"~A\"~%" source)
(let ((expression '()) (group 1) (group-stack nil) (result nil) (fast-first nil)) (if (= (length source) 0)
(return-from regex-compile
'(lambda (&rest args)
(declare (ignore args))
t)))
(cond ((eql (char source 0) #\^)
(setf source (subseq source 1))
(setf anchored t)))
(if (>= (length source) 2)
(if (string= source ".*" :start1 0 :end1 2)
(setf anchored t)))
(if (and (not anchored)
(not (position (char source 0) *regex-special-chars*))
(not (and (> (length source) 1)
(position (char source 1) *regex-special-chars*))))
(setf fast-first `((if (not (dotimes (i length nil)
(if (eql (char string i)
,(char source 0))
(return (setf start i)))))
(return-from final-return nil)))))
(add-exp '((setf (aref *regex-groups* 0)
(list index nil))))
(do ((eindex 0 (1+ eindex)))
((= eindex (length source)))
(let ((current (char source eindex)))
(info "Now processing character ~A index = ~A~%" current eindex)
(case current
((#\.)
(add-exp '((if (>= index length)
(return-from compare nil)
(incf index)))))
((#\$)
(if (= eindex (1- (length source)))
(add-exp '((if (not (= index length))
(return-from compare nil))))
(add-exp '((if (not (and (< index length)
(eql (char string index) #\$)))
(return-from compare nil)
(incf index))))))
((#\*)
(add-exp '(ASTRISK)))
((#\+)
(add-exp '(PLUS)))
((#\?)
(add-exp '(QUESTION)))
((#\()
(incf group)
(push group group-stack)
(add-exp `((setf (aref *regex-groups* ,(1- group))
(list index nil))))
(add-exp `(,group)))
((#\))
(let ((group (pop group-stack)))
(add-exp `((setf (cadr (aref *regex-groups* ,(1- group)))
index)))
(add-exp `(,(- group)))))
((#\[)
(let* ((invert (eql (char source (1+ eindex)) #\^))
(bitstring (make-array 256 :element-type 'bit
:initial-element
(if invert 1 0)))
(set-char (if invert 0 1)))
(if invert (incf eindex))
(do ((x (1+ eindex) (1+ x)))
((eql (char source x) #\]) (setf eindex x))
(info "Building range with character ~A~%" (char source x))
(cond ((and (eql (char source (1+ x)) #\-)
(not (eql (char source (+ x 2)) #\])))
(if (>= (char-code (char source x))
(char-code (char source (+ 2 x))))
(error "Invalid range \"~A-~A\". Ranges must be in acending order"
(char source x) (char source (+ 2 x))))
(do ((j (char-code (char source x)) (1+ j)))
((> j (char-code (char source (+ 2 x))))
(incf x 2))
(info "Setting bit for char ~A code ~A~%" (code-char j) j)
(setf (sbit bitstring j) set-char)))
(t
(cond ((not (eql (char source x) #\]))
(let ((char (char source x)))
(if (eql (char source x) #\\ )
(let ((length))
(multiple-value-setq (char length)
(regex-quoted (subseq source x) invert))
(incf x length)))
(info "Setting bit for char ~A code ~A~%" char (char-code char))
(if (not (vectorp char))
(setf (sbit bitstring (char-code (char source x))) set-char)
(bit-ior bitstring char t))))))))
(add-exp `((let ((range ,bitstring))
(if (>= index length)
(return-from compare nil))
(if (= 1 (sbit range (char-code (char string index))))
(incf index)
(return-from compare nil)))))))
((#\\ )
(let ((length)
(value))
(multiple-value-setq (value length)
(regex-quoted (subseq source (1+ eindex)) nil))
(cond ((listp value)
(add-exp value))
((characterp value)
(add-exp `((if (not (and (< index length)
(eql (char string index)
,value)))
(return-from compare nil)
(incf index)))))
((vectorp value)
(add-exp `((let ((range ,value))
(if (>= index length)
(return-from compare nil))
(if (= 1 (sbit range (char-code (char string index))))
(incf index)
(return-from compare nil)))))))
(incf eindex length)))
(t
(let* ((lit "")
(term (dotimes (litindex (- (length source) eindex) nil)
(let ((litchar (char source (+ eindex litindex))))
(if (position litchar *regex-special-chars*)
(return litchar)
(progn
(info "Now adding ~A index ~A to lit~%" litchar
litindex)
(setf lit (concatenate 'string lit
(string litchar)))))))))
(if (= (length lit) 1)
(add-exp `((if (not (and (< index length)
(eql (char string index) ,current)))
(return-from compare nil)
(incf index))))
(progn
(if (or (eql term #\*)
(eql term #\+)
(eql term #\?))
(setf lit (subseq lit 0 (1- (length lit)))))
(add-exp `((if (< length (+ index ,(length lit)))
(return-from compare nil))
(if (not (string= string ,lit :start1 index
:end1 (+ index ,(length lit))))
(return-from compare nil)
(incf index ,(length lit)))))))
(incf eindex (1- (length lit))))))))
(add-exp '((setf (cadr (aref *regex-groups* 0))
index)))
(add-exp '((return-from final-return t)))
(setf result (copy-tree nil))
(setf expression (reverse expression))
(do ((elt 0 (1+ elt)))
((>= elt (length expression)))
(let ((piece (nth elt expression)))
(cond ((eql piece 'PLUS)
(cond ((listp (nth (1+ elt) expression))
(setf result (append (list (nth (1+ elt) expression))
result)))
(t
(error "GROUP repeat hasn't been implemented yet~%")))))
(cond ((listp piece) (setf result (append (list piece) result)))
((eql piece 'QUESTION) (cond ((listp (nth (1+ elt) expression))
(setf result
(append `((progn (block compare
,(nth (1+ elt)
expression))
t))
result))
(incf elt))
(t
(error "Optional groups not implemented yet~%"))))
((or (eql piece 'ASTRISK) (eql piece 'PLUS))
(cond ((listp (nth (1+ elt) expression))
(setf result
`((let ((oindex index))
(block compare
(do ()
(nil)
,(nth (1+ elt) expression)))
(do ((start index (1- start)))
((< start oindex) nil)
(let ((index start))
(block compare
,@result))))))
(incf elt))
(t
))
)
(t t)))) (if anchored
(setf result
`(lambda (string &key (start 0) (end (length string)))
(setf *regex-groupings* ,group)
(block final-return
(block compare
(let ((index start)
(length end))
,@result)))))
(setf result
`(lambda (string &key (start 0) (end (length string)))
(setf *regex-groupings* ,group)
(block final-return
(let ((length end))
,@fast-first
(do ((marker start (1+ marker)))
((> marker end) nil)
(let ((index marker))
(if (block compare
,@result)
(return t)))))))))))