from __future__ import absolute_import
import itertools
import lit.util
from lit.ShCommands import Command, GlobItem, Pipeline, Seq
class ShLexer:
def __init__(self, data, win32Escapes = False):
self.data = data
self.pos = 0
self.end = len(data)
self.win32Escapes = win32Escapes
def eat(self):
c = self.data[self.pos]
self.pos += 1
return c
def look(self):
return self.data[self.pos]
def maybe_eat(self, c):
if self.data[self.pos] == c:
self.pos += 1
return True
return False
def lex_arg_fast(self, c):
chunk = self.data[self.pos - 1:].split(None, 1)[0]
if ('|' in chunk or '&' in chunk or
'<' in chunk or '>' in chunk or
"'" in chunk or '"' in chunk or
';' in chunk or '\\' in chunk):
return None
self.pos = self.pos - 1 + len(chunk)
return GlobItem(chunk) if '*' in chunk or '?' in chunk else chunk
def lex_arg_slow(self, c):
if c in "'\"":
str = self.lex_arg_quoted(c)
else:
str = c
unquoted_glob_char = False
quoted_glob_char = False
while self.pos != self.end:
c = self.look()
if c.isspace() or c in "|&;":
break
elif c in '><':
if not str.isdigit():
break
num = int(str)
tok = self.lex_one_token()
assert isinstance(tok, tuple) and len(tok) == 1
return (tok[0], num)
elif c == '"' or c == "'":
self.eat()
quoted_arg = self.lex_arg_quoted(c)
if '*' in quoted_arg or '?' in quoted_arg:
quoted_glob_char = True
str += quoted_arg
elif not self.win32Escapes and c == '\\':
self.eat()
if self.pos == self.end:
lit.util.warning(
"escape at end of quoted argument in: %r" % self.data)
return str
str += self.eat()
elif c in '*?':
unquoted_glob_char = True
str += self.eat()
else:
str += self.eat()
assert not (quoted_glob_char and unquoted_glob_char)
return GlobItem(str) if unquoted_glob_char else str
def lex_arg_quoted(self, delim):
str = ''
while self.pos != self.end:
c = self.eat()
if c == delim:
return str
elif c == '\\' and delim == '"':
if self.pos == self.end:
lit.util.warning(
"escape at end of quoted argument in: %r" % self.data)
return str
c = self.eat()
if c == '"': str += '"'
elif c == '\\':
str += '\\'
else:
str += '\\' + c
else:
str += c
lit.util.warning("missing quote character in %r" % self.data)
return str
def lex_arg_checked(self, c):
pos = self.pos
res = self.lex_arg_fast(c)
end = self.pos
self.pos = pos
reference = self.lex_arg_slow(c)
if res is not None:
if res != reference:
raise ValueError("Fast path failure: %r != %r" % (
res, reference))
if self.pos != end:
raise ValueError("Fast path failure: %r != %r" % (
self.pos, end))
return reference
def lex_arg(self, c):
return self.lex_arg_fast(c) or self.lex_arg_slow(c)
def lex_one_token(self):
c = self.eat()
if c == ';':
return (c,)
if c == '|':
if self.maybe_eat('|'):
return ('||',)
return (c,)
if c == '&':
if self.maybe_eat('&'):
return ('&&',)
if self.maybe_eat('>'):
return ('&>',)
return (c,)
if c == '>':
if self.maybe_eat('&'):
return ('>&',)
if self.maybe_eat('>'):
return ('>>',)
return (c,)
if c == '<':
if self.maybe_eat('&'):
return ('<&',)
if self.maybe_eat('>'):
return ('<<',)
return (c,)
return self.lex_arg(c)
def lex(self):
while self.pos != self.end:
if self.look().isspace():
self.eat()
else:
yield self.lex_one_token()
class ShParser:
def __init__(self, data, win32Escapes = False, pipefail = False):
self.data = data
self.pipefail = pipefail
self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
def lex(self):
for item in self.tokens:
return item
return None
def look(self):
token = self.lex()
if token is not None:
self.tokens = itertools.chain([token], self.tokens)
return token
def parse_command(self):
tok = self.lex()
if not tok:
raise ValueError("empty command!")
if isinstance(tok, tuple):
raise ValueError("syntax error near unexpected token %r" % tok[0])
args = [tok]
redirects = []
while 1:
tok = self.look()
if tok is None:
break
if isinstance(tok, (str, GlobItem)):
args.append(self.lex())
continue
assert isinstance(tok, tuple)
if tok[0] in ('|',';','&','||','&&'):
break
op = self.lex()
arg = self.lex()
if not arg:
raise ValueError("syntax error near token %r" % op[0])
redirects.append((op, arg))
return Command(args, redirects)
def parse_pipeline(self):
negate = False
commands = [self.parse_command()]
while self.look() == ('|',):
self.lex()
commands.append(self.parse_command())
return Pipeline(commands, negate, self.pipefail)
def parse(self):
lhs = self.parse_pipeline()
while self.look():
operator = self.lex()
assert isinstance(operator, tuple) and len(operator) == 1
if not self.look():
raise ValueError(
"missing argument to operator %r" % operator[0])
lhs = Seq(lhs, operator[0], self.parse_pipeline())
return lhs