(require 'cl-lib)
(require 'xml)
(require 'pcase)
(defun string-trim-whitespace (string)
"A simple function, strips the whitespace from beginning and
end of the string. Leaves all other whitespace untouched."
(replace-regexp-in-string
(rx string-start (* whitespace)
(group (+? anything))
(* whitespace) string-end)
"\\1"
string))
(defun esxml-trim-ws (esxml)
"This may cause problems, is intended for parsing xml into sxml
but may eroneously delete desirable white space."
(if (stringp esxml) (string-trim-whitespace esxml)
(pcase-let ((`(,tag ,attrs . ,body) esxml))
`(,tag ,attrs
,@(mapcar 'esxml-trim-ws body)))))
(defun attrp (attr)
"Returns t if attr is a an esxml attribute.
An esxml attribute is a cons of the form (symbol . string)"
(and (consp attr)
(symbolp (car attr))
(stringp (cdr attr))))
(defun esxml--convert-pair (attr)
"Converts from cons cell to attribute pair. Not intended for
general use."
(pcase-let ((`(,car . ,cdr) attr))
(cl-check-type cdr string)
(concat (symbol-name car)
"="
(prin1-to-string (xml-escape-string cdr)))))
(defun attrsp (attrs)
"Returns t if attrs is a list of esxml attributes.
See: `attrp'"
(and (listp attrs)
(cl-every (lambda (attr)
(and (consp attr)
(symbolp (car attr))
(stringp (cdr attr))))
attrs)))
(defun esxml-validate-form (esxml)
"A fast esxml validator. Will error on invalid subparts making
it suitable for hindsight testing."
(cond ((stringp esxml) nil)
((< (length esxml) 2)
(error "%s is too short to be a valid esxml expression" esxml))
(t (pcase-let ((`(,tag ,attrs . ,body) esxml))
(cl-check-type tag symbol)
(cl-check-type attrs attrs)
(mapcar 'esxml-validate-form body)))))
(defun esxml--to-xml-recursive (esxml)
(pcase esxml
((pred stringp)
(xml-escape-string esxml))
(`(raw-string ,string)
(cl-check-type string stringp)
string)
(`(comment nil ,body)
(concat "<!-- " body " -->"))
(`(,tag ,attrs . ,body)
(concat "<" (symbol-name tag)
(when attrs
(concat " " (mapconcat 'esxml--convert-pair attrs " ")))
(if body
(concat ">" (mapconcat 'esxml--to-xml-recursive body "")
"</" (symbol-name tag) ">")
"/>")))))
(defun esxml-to-xml (esxml)
"This translates an esxml expression, i.e. that which is returned
by xml-parse-region. The structure is defined as any of the
following forms:
- A string.
STRING
STRING: the string it is returned with entities escaped
- A list where the first element is the raw-string symbol and the
second is a string.
(raw-string STRING)
STRING: the string is returned unchanged. This allows for caching
of any constant parts, such as headers and footers.
- A list where the first element is the comment symbol and the
second is a string.
(comment STRING)
STRING: the string is embedded in a HTML comment.
- A list where the first element is the tag, the second is
an alist of attribute value pairs and the remainder of the list
is 0 or more esxml elements.
(TAG ATTRS &rest BODY)
TAG: is the tag and must be a symbol.
ATTRS: is an alist of attribute pairs each pair must be of the
form (KEY . VALUE).
KEY: is the name of the attribute and must be a symbol.
VALUE: is the value of the attribute and must be a string.
BODY: is zero or more esxml expressions. Having no body forms
implies that the tag should be self closed. If there is
one or more body forms the tag will always be explicitly
closed, even if they are the empty string."
(condition-case nil
(esxml--to-xml-recursive esxml)
(error (esxml-validate-form esxml))))
(defun pp-esxml-to-xml (esxml)
"This translates an esxml expresion as `esxml-to-xml' but
indents it for ease of human readability, it is necessarily
slower and will produce longer output."
(pcase esxml
((pred stringp)
(xml-escape-string esxml))
(`(raw-string ,string)
(cl-check-type string stringp)
string)
(`(comment nil ,body)
(concat "<!-- " body " -->"))
(`(,tag ,attrs . ,body)
(cl-check-type tag symbol)
(cl-check-type attrs attrs)
(concat "<" (symbol-name tag)
(when attrs
(concat " " (mapconcat 'esxml--convert-pair attrs " ")))
(if body
(concat ">" (if (cl-every 'stringp body)
(mapconcat 'identity body " ")
(concat "\n"
(replace-regexp-in-string
"^" " "
(mapconcat 'pp-esxml-to-xml body "\n"))
"\n"))
"</" (symbol-name tag) ">")
"/>")))
(_
(error "%s is not a valid esxml expression" esxml))))
(defun sxml-to-esxml (sxml)
"Translates sxml to esxml so the common standard can be used.
See: http://okmij.org/ftp/Scheme/SXML.html. Additionally,
(*RAW-STRING* \"string\") is translated to (raw-string
\"string\")."
(pcase sxml
(`(*RAW-STRING* ,body)
`(raw-string ,body))
(`(*COMMENT* ,body)
`(comment nil ,body))
(`(,tag (@ . ,attrs) . ,body)
`(,tag ,(mapcar (lambda (attr)
(cons (car attr)
(or (cadr attr)
(prin1-to-string (car attr)))))
attrs)
,@(mapcar 'sxml-to-esxml body)))
(`(,tag . ,body)
`(,tag nil
,@(mapcar 'sxml-to-esxml body)))
((and sxml (pred stringp)) sxml)))
(defun sxml-to-xml (sxml)
"Translates sxml to xml, via esxml, hey it's only a constant
factor. :)"
(esxml-to-xml (sxml-to-esxml sxml)))
(defun xml-to-esxml (string &optional trim)
(with-temp-buffer
(insert string)
(let ((parse-tree (libxml-parse-xml-region (point-min)
(point-max))))
(if trim
(esxml-trim-ws parse-tree)
parse-tree))))
(defun esxml-get-by-key (esxml key value)
"Returns a list of all elements whose wttribute KEY match
VALUE. KEY should be a symbol, and VALUE should be a string.
Will not recurse below a match."
(unless (stringp esxml)
(pcase-let ((`(,tag ,attrs . ,body) esxml))
(if (equal value
(assoc-default key attrs))
(list esxml)
(apply 'append (mapcar (lambda (sexp)
(esxml-get-by-key sexp key value))
body))))))
(defun esxml-get-tags (esxml tags)
"Returns a list of all elements whose tag is a member of TAGS.
TAGS should be a list of tags to be matched against. Will not
recurse below a match."
(unless (stringp esxml)
(pcase-let ((`(,tag ,attrs . ,body) esxml))
(if (member tag tags)
(list esxml)
(apply 'append (mapcar (lambda (sexp)
(esxml-get-tags sexp tags))
body))))))
(defun esxml-get-forms (esxml)
"Returns a list of all forms."
(esxml-get-tags esxml '(form)))
(defmacro esxml-destructuring-mapcar (args sexp seq)
(declare (indent 2))
(let ((entry (make-symbol "entry")))
`(mapcar (lambda (,entry)
(cl-destructuring-bind ,args ,entry ,sexp))
,seq)))
(provide 'esxml)