;****************************************** -*- lexical-binding: t; -*- ***
;* *
;* OCaml *
;* *
;* Jacques Garrigue, Ian T Zimmerman, Damien Doligez *
;* *
;* Copyright 1997 Institut National de Recherche en Informatique et *
;* en Automatique. *
;* *
;* All rights reserved. This file is distributed under the terms of *
;* the GNU General Public License. *
;* *
;**************************************************************************
;; caml-font: font-lock support for OCaml files
;; now with perfect parsing of comments and strings
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; In order to correctly fontify an OCaml buffer, it is necessary to
; lex the buffer to tell what is a comment and what is a string.
; We do this incrementally in a hook
; (font-lock-extend-after-change-region-function), which is called
; whenever the buffer changes. It sets the syntax-table property
; on each beginning and end of chars, strings, and comments.
; This mode handles correctly all the strange cases in the following
; OCaml code.
;
; let l' _ = ();;
; let _' _ = ();;
; let l' = ();;
; let b2_' = ();;
; let a'a' = ();;
; let f2 _ _ = ();;
; let f3 _ _ _ = ();;
; let f' _ _ _ _ _ = ();;
; let hello = ();;
;
; (* ==== easy stuff ==== *)
;
; (* a comment *)
; (* "a string" in a comment *)
; (* "another string *)" in a comment *)
; (* not a string '"' in a comment *)
; "a string";;
; '"';; (* not a string *)
;
; (* ==== hard stuff ==== *)
;
; l'"' not not a string ";;
; _'"' also not not a string";;
; f2 0l'"';; (* not not not a string *)
; f2 0_'"';; (* also not not not a string *)
; f3 0.0l'"' not not not not a string ";;
; f3 0.0_'"';; (* not not not not not a string *)
; f2 0b01_'"';; (* not not not a string *)
; f3 0b2_'"' not not not not a string ";;
; f3 0b02_'"';; (* not not not not not a string *)
; '\'';; (* a char *)
; '
; ';; (* a char *)
; '^M
; ';; (* also a char [replace ^M with one CR character] *)
; a'a';; (* not a char *)
; type '
; a' t = X;; (* also not a char *)
;
; (* ==== far-out stuff ==== *)
;
; f'"'" "*) print_endline "hello";;(* \"" ;;
; (* f'"'" "*) print_endline "hello";;(* \"" ;; *)
; decimal integers are folded into the RE for floats to get longest-match
; without using posix-looking-at
; match any ident or numeral token
; match any char token
; match a quote followed by a newline
; match an opening delimiter for a quoted string
; match any token or sequence of tokens that cannot contain a
; quote, double quote, a start of comment or quoted string, or a newline
; note: this is only to go faster than one character at a time
; match any sequence of non-special characters in a comment
; note: this is only to go faster than one character at a time
; match any sequence of non-special characters in a string
; note: this is only to go faster than one character at a time
; match a newline
; Put the 'caml-font-state property with the given state on the
; character before pos. Return nil if it was already there, t if not.
; Same as looking-at, but erase properties 'caml-font-state and
; 'syntax-table from the matched range
; Annotate the buffer starting at point in state (st . depth)
; Set the 'syntax-table property on beginnings and ends of:
; - strings
; - chars
; - comments
; Also set the 'caml-font-state property on each LF character that is
; not preceded by a single quote. The property gives the state of the
; lexer (nil or t) after reading that character.
; Leave the point at a point where the pre-existing 'caml-font-state
; property is consistent with the new parse, or at the end of the buffer.
; depth is the depth of nested comments at this point
; it must be a non-negative integer
; st can be:
; nil -- we are in the base state
; t -- we are within a string
; a string -- we are within a quoted string and st is the closing delimiter
; This is the hook function for font-lock-extend-after-change-function
; It finds the nearest saved state at the left of the changed text,
; calls caml-font-annotate to set the 'caml-font-state and 'syntax-table
; properties, then returns the range that was parsed by caml-font-annotate.
; We don't use the normal caml-mode syntax table because it contains an
; approximation of strings and comments that interferes with our
; annotations.