A Lisp implemented in AWK
# SPDX-License-Identifier: BSD-2-Clause

function _eval3_other_special_forms(form, env, d,      car, a) {
    car = _car(form)
    if(car == _symbol("system"))
        return _system(_eval3(_cadr(form), env, env, d+1))
    else if(car == _symbol("tolower"))
        return _tolower(_eval3(_cadr(form), env, env, d+1))
    else if(car == _symbol("toupper"))
        return _toupper(_eval3(_cadr(form), env, env, d+1))
    else if(car == _symbol("substr"))
        # trusting the user here to provide either two or three args
        if(_is_null(_cdddr(form)))
            return _substr2(_eval3(_cadr(form), env, env, d+1),
                            _eval3(_caddr(form), env, env, d+1))
        else
            return _substr3(_eval3(_cadr(form), env, env, d+1),
                            _eval3(_caddr(form), env, env, d+1),
                            _eval3(_car(_cdddr(form)), env, env, d+1))
    else if(car == _symbol("index"))
        return _index(_eval3(_cadr(form), env, env, d+1),
                      _eval3(_caddr(form), env, env, d+1))
    else if(car == _symbol("match"))
        return _match(_eval3(_cadr(form), env, env, d+1),
                      _eval3(_caddr(form), env, env, d+1))
    else if(car == _symbol("split"))
        # for now you must provide fs
        return _split2(_eval3(_cadr(form), env, env, d+1),
                       _eval3(_caddr(form), env, env, d+1))
    # it would be easy to make non-destructive sub and gsub.
    else if(car == _symbol("sub"))
        # for now you must provide s
        return _sub3(_eval3(_cadr(form), env, env, d+1),
                     
                     _eval3(_caddr(form), env, env, d+1),
                     _eval3(_car(_cdddr(form)), env, env, d+1))
    else if(car == _symbol("gsub"))
        return _gsub3(_eval3(_cadr(form), env, env, d+1),
                      _eval3(_caddr(form), env, env, d+1),
                      _eval3(_car(_cdddr(form)), env, env, d+1))
    else if(car == _symbol("printf"))
        # oo tricky, varargs. note we are sending the cddr in unevaluated.
        return _printf(_eval3(_cadr(form), env, env, d+1),
                       _cddr(form))
    else if(car == _symbol("sprintf"))
        # same
        return _sprintf(_eval3(_cadr(form), env, env, d+1),
                        _cddr(form))
    else if(car == _symbol("string-length"))
        return _string_length(_eval3(_cadr(form), env, env, d+1))
    else if(car == _symbol("strcat"))
        # same, varargs.
        return _strcat(_cdr(form))
    else if(car == _symbol("getline"))
        return _getline()
    else if(car == _symbol("with-ors"))
        return _with_ors(_eval3(_cadr(form), env, env, d+1),
                         _cddr(form), env, d+1) # to be evaluated using evprog
    else if(car == _symbol("with-rs"))
        return _with_rs(_eval3(_cadr(form), env, env, d+1),
                        _cddr(form), env, d+1) # to be evaluated using evprog
    else if(car == _symbol("with-fs"))
        return _with_fs(_eval3(_cadr(form), env, env, d+1),
                        _cddr(form), env, d+1) # to be evaluated using evprog
    else if(car == _symbol("with-output-to"))
        return _with_output_to(_eval3(_cadr(form), env, env, d+1),
                               _eval3(_caddr(form), env, env, d+1),
                               _cdddr(form), env, d+1) # to be evprogged
    else if(car == _symbol("with-input-from"))
        return _with_input_from(_eval3(_cadr(form), env, env, d+1),
                                _eval3(_caddr(form), env, env, d+1),
                                _cdddr(form), env, d+1) # to be evprogged
    else if(car == _symbol("fflush"))
        return _fflush()
    else if(car == _symbol("close"))
        return _close(_eval3(_cadr(form), env, env, d+1))
    else if(car == _symbol("gc-dot"))
        # two filenames: one for marks and one for sweeps
        return _gc_dot(_cons(env,
                             _cons(_GLOBALS,
                                   _cons(_MACROS,
                                         _cons(_COMPARED_SYMBOLS,
                                               _nil())))),
                       _STRING[_cadr(form)],
                       _STRING[_caddr(form)])
    else if(car == _symbol("gc")) 
        return _gc(_cons(env,
                         _cons(_GLOBALS,
                               _cons(_MACROS,
                                     _cons(_COMPARED_SYMBOLS,
                                           _nil())))))
    else if(car == _symbol("dump"))
        # the first argument is a filename
        return _dump(_STRING[_cadr(form)])
    else if(car == _symbol("dump-dot"))
        return _dump_dot(_STRING[_cadr(form)])
    else _builtin_mischaracterization("_eval3_other_special_forms", car)
}

function _system(s) {
    if(_TYPE[s] == "s") {
        return _system(_STRING[s])
    } else {
        logg_err("_system", "non-string operand " _repr(s))
        return _nil()
    }
}

function _tolower(s,     tv) {
    if(_TYPE[s] == "s") {
        return _string(tolower(_STRING[s]))
    } else {
        logg_err("_tolower", "non-string operand " _repr(s))
        return _nil()
    }
}

function _toupper(s,     tv) {
    if(_TYPE[s] == "s") {
        return _string(toupper(_STRING[s]))
    } else {
        logg_err("_toupper", "non-string operand " _repr(s))
        return _nil()
    }
}

function _substr2(s, a,     tv) {
    if(_TYPE[s] == "s") {
        split(a, tv)
        if(tv[1] == "#") {
            return _string(substr(_STRING[s], tv[2]))
        } else {
            logg_err("_substr2", "non-numeric a " _repr(a))
            return _nil()
        }
    } else {
        logg_err("_substr2", "non-string s " _repr(s))
        return _nil()
    }
}

function _substr3(s, a, b,     tv) {
    if(_TYPE[s] == "s") {
        split(a, tv)
        if(tv[1] == "#") {
            a = tv[2]
            split(b, tv)
            if(tv[1] == "#") {
                b = tv[2]
                return _string(substr(_STRING[s], a, b))
            } else {
                logg_err("_substr3", "non-numeric b " _repr(b))
                return _nil()
            }
        } else {
            logg_err("_substr3", "non-numeric a " _repr(a))
            return _nil()
        }
    } else {
        logg_err("_substr3", "non-string s " _repr(s))
        return _nil()
    }
}

function _index(s, t) {
    if(_TYPE[s] == "s") {
        if(_TYPE[t] == "s") {
            return _number(index(_STRING[s], _STRING[t]))
        } else {
            logg_err("_index", "non-string t " _repr(t))
            return _nil()
        }
    } else {
        logg_err("_index", "non-string s " _repr(s))
        return _nil()
    }
}

function _match(s, r) {
    if(_TYPE[s] == "s") {
        if(_TYPE[t] == "s") {
            match(_STRING[s], _STRING[r])
            # if no match, RSTART will be 0 and RLENGTH -1
            return _cons(_number(RSTART),
                         _cons(_number(RLENGTH), _nil()))
        } else {
            logg_err("_match", "non-string r " _repr(r))
            return _nil()
        }
    } else {
        logg_err("_match", "non-string s " _repr(s))
        return _nil()
    }
}

function _split2(s, fs,       a) {
    if(_TYPE[s] == "s") {
        if(_TYPE[fs] == "s") {
            split(_STRING[s], a, _STRING[fs])
            return _awk_array_of_strings_to_list(a)
        } else {
            logg_err("_split", "non-string fs " _repr(fs))
        }
    } else {
        logg_err("_split", "non-string s " _repr(s))
        return _nil()
    }
}

function _string_length(s,      tv) {
    if(_TYPE[s] == "s") {
        return _number(length(_STRING[s]))
    } else {
        logg_err("_string_length", "non-string " _repr(s))
        return _nil()
    }
}

function _strcat(unevald, env, d,      s, here, val) {
    s = ""
    for(here=unevald; !_is_null(here); here=_cdr(here)) {
        val = _eval3(_car(here), env, env, d+1)
        if(_TYPE[val] == "s") {
            s = s _STRING[val]
        } else {
            logg_err("_strcat", "non-string param " _repr(val))
            return _nil()
        }
    }
    return _string(s)
}

function _sprintf(fmt, unevald, env, d,      dlave, evald, s, a, i, p) {
    n = 1
    dlave = _nil()
    # even if there are extra arguments, they should all be evaluated
    for(; !_is_null(unevald); unevald=_cdr(unevald)) {
        dlave = _cons(_eval3(_car(unevald), env, env, d+1), dlave)
    }
    evald = _nreverse(dlave)
    _list_to_flat_awk_array_of_any(evald, a)
    i = 1
    s = ""
    fmt = _STRING[fmt]
    # here we fill in just one format specifier at a time, because awk
    # has no splatting: we can't say a[1] = 5; a[2] = 7;
    # sprintf("%d%d", *a). so there is no way to produce a variadic
    # call to sprintf. to be less simple and perhaps faster, we might
    # count format specifiers (skipping %%'s!) and do special cases
    # for 1 to 5 parameters; but this while loop should cover all
    # cases.
    while(fmt != "") {
#        logg_dbg("_sprintf", "    fmt is " fmt " and s is " s)
        # find a format specification
        if(match(fmt, /%/)) {
            # just copy whatever is before the %
            s = s substr(fmt, 1, RSTART-1)
            fmt = substr(fmt, RSTART)
#            logg_dbg("_sprintf", "now fmt is " fmt " and s is " s)
            if(match(fmt, /^%%/)) {
                # this is just an escaped %. don't eat a parameter.
                s = s "%"
                fmt = substr(fmt, 3)
                continue
            }
            # now the %-thing is at the beginning of fmt. how long is
            # it? (grammar derived from FreeBSD printf(3); your libc
            # may vary)
            match(fmt,/^%[*#+ 0-9.'-]*[diouxXfFeEgGaAcsb]/);
            # RLENGTH is the length of the format specifier.
            if(i > length(a)) {
                logg_err("_sprintf", "not enough values for sprintf!")
                p = _nil()
            } else {
                p = a[i++]
            }
#            logg_dbg("_sprintf", "tiny fmt is " substr(fmt,1,RLENGTH))
            s = s sprintf(substr(fmt,1,RLENGTH), p)
            fmt = substr(fmt, RLENGTH+1)
        } else {
            s = s fmt
            fmt = ""
        }
    }
    return _string(s)
}

function _printf(fmt, unevald, env, d,      dlave, evald, s, a, i, p) {
    # mostly like _sprintf above
    n = 1
    dlave = _nil()
    for(; !_is_null(unevald); unevald=_cdr(unevald)) {
        dlave = _cons(_eval3(_car(unevald), env, env, d+1), dlave)
    }
    evald = _nreverse(dlave)
    _list_to_flat_awk_array_of_any(evald, a)
    i = 1
    s = ""
    fmt = _STRING[fmt]
    while(fmt != "") {
        if(match(fmt, /%/)) {
            # vv printf the bit before the %
            if(_OUTPUT_REDIR_NAME) {
                if(_OUTPUT_REDIR_KIND == ">") {
                    printf substr(fmt, 1, RSTART-1) > _OUTPUT_REDIR_NAME
                    # don't re-overwrite the file with the next bit
                    _OUTPUT_REDIR_KIND = ">>"
                } else if(_OUTPUT_REDIR_KIND == ">>") {
                    printf substr(fmt, 1, RSTART-1) >> _OUTPUT_REDIR_NAME
                } else if(_OUTPUT_REDIR_KIND == "|") {
                    printf substr(fmt, 1, RSTART-1) | _OUTPUT_REDIR_NAME
                }
            } else {
                printf substr(fmt, 1, RSTART-1)
            }
            # ^^
            fmt = substr(fmt, RSTART)
            # now do the %
            if(match(fmt, /^%%/)) {
                # vv printf a percent character
                if(_OUTPUT_REDIR_NAME) {
                    if(_OUTPUT_REDIR_KIND == ">") { # shouldn't be, by now
                        printf "%%" > _OUTPUT_REDIR_NAME
                        _OUTPUT_REDIR_KIND = ">>"
                    } else if(_OUTPUT_REDIR_KIND == ">>") {
                        printf "%%" >> _OUTPUT_REDIR_NAME
                    } else if(_OUTPUT_REDIR_KIND == "|") {
                        printf "%%" | _OUTPUT_REDIR_NAME
                    }
                } else {
                    printf "%%"
                }
                # ^^
                fmt = substr(fmt, 3)
                continue
            }
            # now the %-thing is at the beginning of fmt. how long is
            # it? (grammar derived from FreeBSD printf(3); your libc
            # may vary)
            match(fmt,/^%[*#+ 0-9.'-]*[diouxXfFeEgGaAcsb]/);
            if(i > length(a)) {
                logg_err("_printf", "not enough values for printf!")
                return _nil()
            } else {
                p = a[i++]
            }
            # RLENGTH is the length of the format specifier
            # vv printf %omgwtfbbq, p
            if(_OUTPUT_REDIR_NAME) {
                if(_OUTPUT_REDIR_KIND == ">") {
                    printf substr(fmt,1,RLENGTH), p > _OUTPUT_REDIR_NAME
                    _OUTPUT_REDIR_KIND = ">>"
                } else if(_OUTPUT_REDIR_KIND == ">>") {
                    printf substr(fmt,1,RLENGTH), p >> _OUTPUT_REDIR_NAME
                } else if(_OUTPUT_REDIR_KIND == "|") {
                    printf substr(fmt,1,RLENGTH), p | _OUTPUT_REDIR_NAME
                }
            } else {
                printf substr(fmt,1,RLENGTH), p
            }
            # ^^
            fmt = substr(fmt, RLENGTH+1)
        } else {
            # vv no more %, printf the rest
            if(_OUTPUT_REDIR_NAME) {
                if(_OUTPUT_REDIR_KIND == ">") {
                    printf fmt > _OUTPUT_REDIR_NAME
                    _OUTPUT_REDIR_KIND = ">>"
                } else if(_OUTPUT_REDIR_KIND == ">>") {
                    printf fmt >> _OUTPUT_REDIR_NAME
                } else if(_OUTPUT_REDIR_KIND == "|") {
                    printf fmt | _OUTPUT_REDIR_NAME
                }
            } else {
                printf fmt
            }
            # ^^
            fmt = ""
        }
    }
    return _nil()
}

function _sub3(r, t, s,   new_s, rv) {
    if(_TYPE[r] == "s") {
        if(_TYPE[t] == "s") {
            if(_TYPE[s] == "s") {
                new_s = _STRING[s]
                sub(_STRING[r], _STRING[t], new_s)
                return _string(new_s)
            } else {
                logg_err("_sub3", "non-string s " _repr(s))
                return _nil()
            }
        } else {
            logg_err("_sub3", "non-string t " _repr(t))
            return _nil()
        }
    } else {
        logg_err("_sub3", "non-string r " _repr(r))
        return _nil()
    }
}

function _gsub3(r, t, s,   new_s, rv) {
    if(_TYPE[r] == "s") {
        if(_TYPE[t] == "s") {
            if(_TYPE[s] == "s") {
                # whoa nelly, destructive update.
                new_s = _STRING[s]
                gsub(_STRING[r], _STRING[t], new_s)
                return _string(new_s)
            } else {
                logg_err("_gsub3", "non-string s " _repr(s))
                return _nil()
            }
        } else {
            logg_err("_gsub3", "non-string t " _repr(t))
            return _nil()
        }
    } else {
        logg_err("_gsub3", "non-string r " _repr(r))
        return _nil()
    }
}

function _getline(        a, rv) {
    if(_INPUT_REDIR_NAME) {
        if(_INPUT_REDIR_KIND == "<") {
            rv = getline a < _INPUT_REDIR_NAME
        } else if(_INPUT_REDIR_KIND == "|") {
            rv = _INPUT_REDIR_NAME | getline a
        }
    } else {
        rv = getline a
    }
    return _cons(_string(a), _cons(_number(rv), _nil()))
}

function _with_ors(new_ors, forms, env, d,       old_ors, rv) {
    old_ors = ORS
    if(_TYPE[new_ors] == "s") {
        ORS = _STRING[new_ors]
        rv = _evprog(forms, env, env, d)
        ORS = old_ors
        return rv
    } else {
        logg_err("_with_ors", "with-ors needs a string")
        return _nil()
    }
}

function _with_rs(new_rs, forms, env, d,       old_rs, rv) {
    old_rs = RS
    if(_TYPE[new_rs] == "s") {
        RS = _STRING[new_rs]
        rv = _evprog(forms, env, env, d)
        RS = old_rs
        return rv
    } else {
        logg_err("_with_rs", "with-rs needs a string")
        return _nil()
    }
}

function _with_fs(new_fs, forms, env, d,       old_fs, rv) {
    old_fs = FS
    if(_TYPE[new_fs] == "s") {
        FS = _STRING[new_fs]
        rv = _evprog(forms, env, env, d)
        FS = old_fs
        return rv
    } else {
        logg_err("_with_fs", "with-fs needs a string")
        return _nil()
    }
}



# it is up to every function that produces output under programmatic
# control to check these globals. they start out uninitialized and
# are only set within the body of with-output-to.
function _with_output_to(redir_kind, name, forms, env, d,       old_redir_kind, old_name, rv) {
    old_redir_kind = _OUTPUT_REDIR_KIND
    old_name       = _OUTPUT_NAME
    if(_TYPE[redir_kind] == "s") {
        if(_TYPE[name] == "s") {
            _OUTPUT_REDIR_KIND = _STRING[redir_kind]
            _OUTPUT_REDIR_NAME = _STRING[name]
            rv = _evprog(forms, env, env, d)
            _OUTPUT_REDIR_KIND = old_redir_kind
            _OUTPUT_REDIR_NAME = old_name
            return rv
        } else {
            logg_err("_with_output_to", "file or command should be a string")
            return _nil()
        }
    } else {
        logg_err("_with_output_to", "redir kind should be a string: \">\", \">>\" or \"|\"")
        return _nil()
    }
}


# same as above. every function that consumes input, check these
# globals. they are only set within the body of with-output-to. note:
# (with-input-from "|" "echo foo" (getline)) in glotawk corresponds
# with "echo foo" | getline in awk.
function _with_input_from(redir_kind, name, forms, env, d,       old_redir_kind, old_name, rv) {
    old_redir_kind = _INPUT_REDIR_KIND
    old_name       = _INPUT_NAME
    if(_TYPE[redir_kind] == "s") {
        if(_TYPE[name] == "s") {
            _INPUT_REDIR_KIND = _STRING[redir_kind]
            _INPUT_REDIR_NAME = _STRING[name]
            rv = _evprog(forms, env, env, d)
            _INPUT_REDIR_KIND = old_redir_kind
            _INPUT_REDIR_NAME = old_name
            return rv
        } else {
            logg_err("_with_input_from", "file or command should be a string")
            return _nil()
        }
    } else {
        logg_err("_with_input_from", "redir kind should be a string: \"<\" or \"|\"")
        return _nil()
    }
}

function _fflush() {
    if(_OUTPUT_REDIR_NAME) {
        fflush(_OUTPUT_REDIR_NAME)
    } else {
        fflush("/dev/stdout")
    }
    return _nil()
}

function _close(thing) {
    if(_TYPE[thing] == "s") {
        close(_STRING[thing])
    } else {
        logg_err("_close", "file or command to close should be a string")
    }
    return _nil()
}