# SPDX-License-Identifier: BSD-2-Clause function _eval3_other_special_forms(form, env, d, car, a) { car = _car(form) if(car == _symbol("system")) return _system(_eval3(_cadr(form), env, env, d+1)) else if(car == _symbol("tolower")) return _tolower(_eval3(_cadr(form), env, env, d+1)) else if(car == _symbol("toupper")) return _toupper(_eval3(_cadr(form), env, env, d+1)) else if(car == _symbol("substr")) # trusting the user here to provide either two or three args if(_is_null(_cdddr(form))) return _substr2(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1)) else return _substr3(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1), _eval3(_car(_cdddr(form)), env, env, d+1)) else if(car == _symbol("index")) return _index(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1)) else if(car == _symbol("match")) return _match(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1)) else if(car == _symbol("split")) # for now you must provide fs return _split2(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1)) # it would be easy to make non-destructive sub and gsub. else if(car == _symbol("sub")) # for now you must provide s return _sub3(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1), _eval3(_car(_cdddr(form)), env, env, d+1)) else if(car == _symbol("gsub")) return _gsub3(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1), _eval3(_car(_cdddr(form)), env, env, d+1)) else if(car == _symbol("printf")) # oo tricky, varargs. note we are sending the cddr in unevaluated. return _printf(_eval3(_cadr(form), env, env, d+1), _cddr(form)) else if(car == _symbol("sprintf")) # same return _sprintf(_eval3(_cadr(form), env, env, d+1), _cddr(form)) else if(car == _symbol("string-length")) return _string_length(_eval3(_cadr(form), env, env, d+1)) else if(car == _symbol("strcat")) # same, varargs. return _strcat(_cdr(form)) else if(car == _symbol("getline")) return _getline() else if(car == _symbol("with-ors")) return _with_ors(_eval3(_cadr(form), env, env, d+1), _cddr(form), env, d+1) # to be evaluated using evprog else if(car == _symbol("with-rs")) return _with_rs(_eval3(_cadr(form), env, env, d+1), _cddr(form), env, d+1) # to be evaluated using evprog else if(car == _symbol("with-fs")) return _with_fs(_eval3(_cadr(form), env, env, d+1), _cddr(form), env, d+1) # to be evaluated using evprog else if(car == _symbol("with-output-to")) return _with_output_to(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1), _cdddr(form), env, d+1) # to be evprogged else if(car == _symbol("with-input-from")) return _with_input_from(_eval3(_cadr(form), env, env, d+1), _eval3(_caddr(form), env, env, d+1), _cdddr(form), env, d+1) # to be evprogged else if(car == _symbol("fflush")) return _fflush() else if(car == _symbol("close")) return _close(_eval3(_cadr(form), env, env, d+1)) else if(car == _symbol("gc-dot")) # two filenames: one for marks and one for sweeps return _gc_dot(_cons(env, _cons(_GLOBALS, _cons(_MACROS, _cons(_COMPARED_SYMBOLS, _nil())))), _STRING[_cadr(form)], _STRING[_caddr(form)]) else if(car == _symbol("gc")) return _gc(_cons(env, _cons(_GLOBALS, _cons(_MACROS, _cons(_COMPARED_SYMBOLS, _nil()))))) else if(car == _symbol("dump")) # the first argument is a filename return _dump(_STRING[_cadr(form)]) else if(car == _symbol("dump-dot")) return _dump_dot(_STRING[_cadr(form)]) else _builtin_mischaracterization("_eval3_other_special_forms", car) } function _system(s) { if(_TYPE[s] == "s") { return _system(_STRING[s]) } else { logg_err("_system", "non-string operand " _repr(s)) return _nil() } } function _tolower(s, tv) { if(_TYPE[s] == "s") { return _string(tolower(_STRING[s])) } else { logg_err("_tolower", "non-string operand " _repr(s)) return _nil() } } function _toupper(s, tv) { if(_TYPE[s] == "s") { return _string(toupper(_STRING[s])) } else { logg_err("_toupper", "non-string operand " _repr(s)) return _nil() } } function _substr2(s, a, tv) { if(_TYPE[s] == "s") { split(a, tv) if(tv[1] == "#") { return _string(substr(_STRING[s], tv[2])) } else { logg_err("_substr2", "non-numeric a " _repr(a)) return _nil() } } else { logg_err("_substr2", "non-string s " _repr(s)) return _nil() } } function _substr3(s, a, b, tv) { if(_TYPE[s] == "s") { split(a, tv) if(tv[1] == "#") { a = tv[2] split(b, tv) if(tv[1] == "#") { b = tv[2] return _string(substr(_STRING[s], a, b)) } else { logg_err("_substr3", "non-numeric b " _repr(b)) return _nil() } } else { logg_err("_substr3", "non-numeric a " _repr(a)) return _nil() } } else { logg_err("_substr3", "non-string s " _repr(s)) return _nil() } } function _index(s, t) { if(_TYPE[s] == "s") { if(_TYPE[t] == "s") { return _number(index(_STRING[s], _STRING[t])) } else { logg_err("_index", "non-string t " _repr(t)) return _nil() } } else { logg_err("_index", "non-string s " _repr(s)) return _nil() } } function _match(s, r) { if(_TYPE[s] == "s") { if(_TYPE[t] == "s") { match(_STRING[s], _STRING[r]) # if no match, RSTART will be 0 and RLENGTH -1 return _cons(_number(RSTART), _cons(_number(RLENGTH), _nil())) } else { logg_err("_match", "non-string r " _repr(r)) return _nil() } } else { logg_err("_match", "non-string s " _repr(s)) return _nil() } } function _split2(s, fs, a) { if(_TYPE[s] == "s") { if(_TYPE[fs] == "s") { split(_STRING[s], a, _STRING[fs]) return _awk_array_of_strings_to_list(a) } else { logg_err("_split", "non-string fs " _repr(fs)) } } else { logg_err("_split", "non-string s " _repr(s)) return _nil() } } function _string_length(s, tv) { if(_TYPE[s] == "s") { return _number(length(_STRING[s])) } else { logg_err("_string_length", "non-string " _repr(s)) return _nil() } } function _strcat(unevald, env, d, s, here, val) { s = "" for(here=unevald; !_is_null(here); here=_cdr(here)) { val = _eval3(_car(here), env, env, d+1) if(_TYPE[val] == "s") { s = s _STRING[val] } else { logg_err("_strcat", "non-string param " _repr(val)) return _nil() } } return _string(s) } function _sprintf(fmt, unevald, env, d, dlave, evald, s, a, i, p) { n = 1 dlave = _nil() # even if there are extra arguments, they should all be evaluated for(; !_is_null(unevald); unevald=_cdr(unevald)) { dlave = _cons(_eval3(_car(unevald), env, env, d+1), dlave) } evald = _nreverse(dlave) _list_to_flat_awk_array_of_any(evald, a) i = 1 s = "" fmt = _STRING[fmt] # here we fill in just one format specifier at a time, because awk # has no splatting: we can't say a[1] = 5; a[2] = 7; # sprintf("%d%d", *a). so there is no way to produce a variadic # call to sprintf. to be less simple and perhaps faster, we might # count format specifiers (skipping %%'s!) and do special cases # for 1 to 5 parameters; but this while loop should cover all # cases. while(fmt != "") { # logg_dbg("_sprintf", " fmt is " fmt " and s is " s) # find a format specification if(match(fmt, /%/)) { # just copy whatever is before the % s = s substr(fmt, 1, RSTART-1) fmt = substr(fmt, RSTART) # logg_dbg("_sprintf", "now fmt is " fmt " and s is " s) if(match(fmt, /^%%/)) { # this is just an escaped %. don't eat a parameter. s = s "%" fmt = substr(fmt, 3) continue } # now the %-thing is at the beginning of fmt. how long is # it? (grammar derived from FreeBSD printf(3); your libc # may vary) match(fmt,/^%[*#+ 0-9.'-]*[diouxXfFeEgGaAcsb]/); # RLENGTH is the length of the format specifier. if(i > length(a)) { logg_err("_sprintf", "not enough values for sprintf!") p = _nil() } else { p = a[i++] } # logg_dbg("_sprintf", "tiny fmt is " substr(fmt,1,RLENGTH)) s = s sprintf(substr(fmt,1,RLENGTH), p) fmt = substr(fmt, RLENGTH+1) } else { s = s fmt fmt = "" } } return _string(s) } function _printf(fmt, unevald, env, d, dlave, evald, s, a, i, p) { # mostly like _sprintf above n = 1 dlave = _nil() for(; !_is_null(unevald); unevald=_cdr(unevald)) { dlave = _cons(_eval3(_car(unevald), env, env, d+1), dlave) } evald = _nreverse(dlave) _list_to_flat_awk_array_of_any(evald, a) i = 1 s = "" fmt = _STRING[fmt] while(fmt != "") { if(match(fmt, /%/)) { # vv printf the bit before the % if(_OUTPUT_REDIR_NAME) { if(_OUTPUT_REDIR_KIND == ">") { printf substr(fmt, 1, RSTART-1) > _OUTPUT_REDIR_NAME # don't re-overwrite the file with the next bit _OUTPUT_REDIR_KIND = ">>" } else if(_OUTPUT_REDIR_KIND == ">>") { printf substr(fmt, 1, RSTART-1) >> _OUTPUT_REDIR_NAME } else if(_OUTPUT_REDIR_KIND == "|") { printf substr(fmt, 1, RSTART-1) | _OUTPUT_REDIR_NAME } } else { printf substr(fmt, 1, RSTART-1) } # ^^ fmt = substr(fmt, RSTART) # now do the % if(match(fmt, /^%%/)) { # vv printf a percent character if(_OUTPUT_REDIR_NAME) { if(_OUTPUT_REDIR_KIND == ">") { # shouldn't be, by now printf "%%" > _OUTPUT_REDIR_NAME _OUTPUT_REDIR_KIND = ">>" } else if(_OUTPUT_REDIR_KIND == ">>") { printf "%%" >> _OUTPUT_REDIR_NAME } else if(_OUTPUT_REDIR_KIND == "|") { printf "%%" | _OUTPUT_REDIR_NAME } } else { printf "%%" } # ^^ fmt = substr(fmt, 3) continue } # now the %-thing is at the beginning of fmt. how long is # it? (grammar derived from FreeBSD printf(3); your libc # may vary) match(fmt,/^%[*#+ 0-9.'-]*[diouxXfFeEgGaAcsb]/); if(i > length(a)) { logg_err("_printf", "not enough values for printf!") return _nil() } else { p = a[i++] } # RLENGTH is the length of the format specifier # vv printf %omgwtfbbq, p if(_OUTPUT_REDIR_NAME) { if(_OUTPUT_REDIR_KIND == ">") { printf substr(fmt,1,RLENGTH), p > _OUTPUT_REDIR_NAME _OUTPUT_REDIR_KIND = ">>" } else if(_OUTPUT_REDIR_KIND == ">>") { printf substr(fmt,1,RLENGTH), p >> _OUTPUT_REDIR_NAME } else if(_OUTPUT_REDIR_KIND == "|") { printf substr(fmt,1,RLENGTH), p | _OUTPUT_REDIR_NAME } } else { printf substr(fmt,1,RLENGTH), p } # ^^ fmt = substr(fmt, RLENGTH+1) } else { # vv no more %, printf the rest if(_OUTPUT_REDIR_NAME) { if(_OUTPUT_REDIR_KIND == ">") { printf fmt > _OUTPUT_REDIR_NAME _OUTPUT_REDIR_KIND = ">>" } else if(_OUTPUT_REDIR_KIND == ">>") { printf fmt >> _OUTPUT_REDIR_NAME } else if(_OUTPUT_REDIR_KIND == "|") { printf fmt | _OUTPUT_REDIR_NAME } } else { printf fmt } # ^^ fmt = "" } } return _nil() } function _sub3(r, t, s, new_s, rv) { if(_TYPE[r] == "s") { if(_TYPE[t] == "s") { if(_TYPE[s] == "s") { new_s = _STRING[s] sub(_STRING[r], _STRING[t], new_s) return _string(new_s) } else { logg_err("_sub3", "non-string s " _repr(s)) return _nil() } } else { logg_err("_sub3", "non-string t " _repr(t)) return _nil() } } else { logg_err("_sub3", "non-string r " _repr(r)) return _nil() } } function _gsub3(r, t, s, new_s, rv) { if(_TYPE[r] == "s") { if(_TYPE[t] == "s") { if(_TYPE[s] == "s") { # whoa nelly, destructive update. new_s = _STRING[s] gsub(_STRING[r], _STRING[t], new_s) return _string(new_s) } else { logg_err("_gsub3", "non-string s " _repr(s)) return _nil() } } else { logg_err("_gsub3", "non-string t " _repr(t)) return _nil() } } else { logg_err("_gsub3", "non-string r " _repr(r)) return _nil() } } function _getline( a, rv) { if(_INPUT_REDIR_NAME) { if(_INPUT_REDIR_KIND == "<") { rv = getline a < _INPUT_REDIR_NAME } else if(_INPUT_REDIR_KIND == "|") { rv = _INPUT_REDIR_NAME | getline a } } else { rv = getline a } return _cons(_string(a), _cons(_number(rv), _nil())) } function _with_ors(new_ors, forms, env, d, old_ors, rv) { old_ors = ORS if(_TYPE[new_ors] == "s") { ORS = _STRING[new_ors] rv = _evprog(forms, env, env, d) ORS = old_ors return rv } else { logg_err("_with_ors", "with-ors needs a string") return _nil() } } function _with_rs(new_rs, forms, env, d, old_rs, rv) { old_rs = RS if(_TYPE[new_rs] == "s") { RS = _STRING[new_rs] rv = _evprog(forms, env, env, d) RS = old_rs return rv } else { logg_err("_with_rs", "with-rs needs a string") return _nil() } } function _with_fs(new_fs, forms, env, d, old_fs, rv) { old_fs = FS if(_TYPE[new_fs] == "s") { FS = _STRING[new_fs] rv = _evprog(forms, env, env, d) FS = old_fs return rv } else { logg_err("_with_fs", "with-fs needs a string") return _nil() } } # it is up to every function that produces output under programmatic # control to check these globals. they start out uninitialized and # are only set within the body of with-output-to. function _with_output_to(redir_kind, name, forms, env, d, old_redir_kind, old_name, rv) { old_redir_kind = _OUTPUT_REDIR_KIND old_name = _OUTPUT_NAME if(_TYPE[redir_kind] == "s") { if(_TYPE[name] == "s") { _OUTPUT_REDIR_KIND = _STRING[redir_kind] _OUTPUT_REDIR_NAME = _STRING[name] rv = _evprog(forms, env, env, d) _OUTPUT_REDIR_KIND = old_redir_kind _OUTPUT_REDIR_NAME = old_name return rv } else { logg_err("_with_output_to", "file or command should be a string") return _nil() } } else { logg_err("_with_output_to", "redir kind should be a string: \">\", \">>\" or \"|\"") return _nil() } } # same as above. every function that consumes input, check these # globals. they are only set within the body of with-output-to. note: # (with-input-from "|" "echo foo" (getline)) in glotawk corresponds # with "echo foo" | getline in awk. function _with_input_from(redir_kind, name, forms, env, d, old_redir_kind, old_name, rv) { old_redir_kind = _INPUT_REDIR_KIND old_name = _INPUT_NAME if(_TYPE[redir_kind] == "s") { if(_TYPE[name] == "s") { _INPUT_REDIR_KIND = _STRING[redir_kind] _INPUT_REDIR_NAME = _STRING[name] rv = _evprog(forms, env, env, d) _INPUT_REDIR_KIND = old_redir_kind _INPUT_REDIR_NAME = old_name return rv } else { logg_err("_with_input_from", "file or command should be a string") return _nil() } } else { logg_err("_with_input_from", "redir kind should be a string: \"<\" or \"|\"") return _nil() } } function _fflush() { if(_OUTPUT_REDIR_NAME) { fflush(_OUTPUT_REDIR_NAME) } else { fflush("/dev/stdout") } return _nil() } function _close(thing) { if(_TYPE[thing] == "s") { close(_STRING[thing]) } else { logg_err("_close", "file or command to close should be a string") } return _nil() }