Compiler projects using llvm
import re

class BooleanExpression:
    # A simple evaluator of boolean expressions.
    #
    # Grammar:
    #   expr         :: or_expr
    #   or_expr      :: and_expr ('||' and_expr)*
    #   and_expr     :: not_expr ('&&' not_expr)*
    #   not_expr     :: '!' not_expr
    #                   '(' or_expr ')'
    #                   match_expr
    #   match_expr   :: braced_regex
    #                   identifier
    #                   braced_regex match_expr
    #                   identifier match_expr
    #   identifier   :: [-+=._a-zA-Z0-9]+
    #   braced_regex :: '{{' python_regex '}}'

    # Evaluates `string` as a boolean expression.
    # Returns True or False. Throws a ValueError on syntax error.
    #
    # Variables in `variables` are true.
    # Regexes that match any variable in `variables` are true.
    # Substrings of `triple` are true.
    # 'true' is true.
    # All other identifiers are false.
    @staticmethod
    def evaluate(string, variables, triple=""):
        try:
            parser = BooleanExpression(string, set(variables), triple)
            return parser.parseAll()
        except ValueError as e:
            raise ValueError(str(e) + ('\nin expression: %r' % string))

    #####

    def __init__(self, string, variables, triple=""):
        self.tokens = BooleanExpression.tokenize(string)
        self.variables = variables
        self.variables.add('true')
        self.triple = triple
        self.value = None
        self.token = None

    # Singleton end-of-expression marker.
    END = object()

    # Tokenization pattern.
    Pattern = re.compile(r'\A\s*([()]|&&|\|\||!|(?:[-+=._a-zA-Z0-9]+|\{\{.+?\}\})+)\s*(.*)\Z')

    @staticmethod
    def tokenize(string):
        while True:
            m = re.match(BooleanExpression.Pattern, string)
            if m is None:
                if string == "":
                    yield BooleanExpression.END;
                    return
                else:
                    raise ValueError("couldn't parse text: %r" % string)

            token = m.group(1)
            string = m.group(2)
            yield token

    def quote(self, token):
        if token is BooleanExpression.END:
            return '<end of expression>'
        else:
            return repr(token)

    def accept(self, t):
        if self.token == t:
            self.token = next(self.tokens)
            return True
        else:
            return False

    def expect(self, t):
        if self.token == t:
            if self.token != BooleanExpression.END:
                self.token = next(self.tokens)
        else:
            raise ValueError("expected: %s\nhave: %s" %
                             (self.quote(t), self.quote(self.token)))

    @staticmethod
    def isMatchExpression(token):
        if (token is BooleanExpression.END or token == '&&' or token == '||' or
            token == '!' or token == '(' or token == ')'):
            return False
        return True

    def parseMATCH(self):
        regex = ''
        for part in filter(None, re.split(r'(\{\{.+?\}\})', self.token)):
            if part.startswith('{{'):
                assert part.endswith('}}')
                regex += '(?:{})'.format(part[2:-2])
            else:
                regex += re.escape(part)
        regex = re.compile(regex)
        self.value = self.token in self.triple or any(regex.fullmatch(var) for var in self.variables)
        self.token = next(self.tokens)

    def parseNOT(self):
        if self.accept('!'):
            self.parseNOT()
            self.value = not self.value
        elif self.accept('('):
            self.parseOR()
            self.expect(')')
        elif not BooleanExpression.isMatchExpression(self.token):
            raise ValueError("expected: '!', '(', '{{', or identifier\nhave: %s" %
                             self.quote(self.token))
        else:
            self.parseMATCH()

    def parseAND(self):
        self.parseNOT()
        while self.accept('&&'):
            left = self.value
            self.parseNOT()
            right = self.value
            # this is technically the wrong associativity, but it
            # doesn't matter for this limited expression grammar
            self.value = left and right

    def parseOR(self):
        self.parseAND()
        while self.accept('||'):
            left = self.value
            self.parseAND()
            right = self.value
            # this is technically the wrong associativity, but it
            # doesn't matter for this limited expression grammar
            self.value = left or right

    def parseAll(self):
        self.token = next(self.tokens)
        self.parseOR()
        self.expect(BooleanExpression.END)
        return self.value


#######
# Tests

import unittest

class TestBooleanExpression(unittest.TestCase):
    def test_variables(self):
        variables = {'its-true', 'false-lol-true', 'under_score',
                     'e=quals', 'd1g1ts'}
        self.assertTrue(BooleanExpression.evaluate('true', variables))
        self.assertTrue(BooleanExpression.evaluate('its-true', variables))
        self.assertTrue(BooleanExpression.evaluate('false-lol-true', variables))
        self.assertTrue(BooleanExpression.evaluate('under_score', variables))
        self.assertTrue(BooleanExpression.evaluate('e=quals', variables))
        self.assertTrue(BooleanExpression.evaluate('d1g1ts', variables))
        self.assertTrue(BooleanExpression.evaluate('{{its.+}}', variables))
        self.assertTrue(BooleanExpression.evaluate('{{false-[lo]+-true}}', variables))
        self.assertTrue(BooleanExpression.evaluate('{{(true|false)-lol-(true|false)}}', variables))
        self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}ts', variables))
        self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}t{{[a-z]}}', variables))
        self.assertTrue(BooleanExpression.evaluate('{{d}}1g{{[0-9]}}t{{[a-z]}}', variables))
        self.assertTrue(BooleanExpression.evaluate('d1{{(g|1)+}}ts', variables))

        self.assertFalse(BooleanExpression.evaluate('false', variables))
        self.assertFalse(BooleanExpression.evaluate('True', variables))
        self.assertFalse(BooleanExpression.evaluate('true-ish', variables))
        self.assertFalse(BooleanExpression.evaluate('not_true', variables))
        self.assertFalse(BooleanExpression.evaluate('tru', variables))
        self.assertFalse(BooleanExpression.evaluate('{{its-true.+}}', variables))

    def test_triple(self):
        triple = 'arch-vendor-os'
        self.assertTrue(BooleanExpression.evaluate('arch-', {}, triple))
        self.assertTrue(BooleanExpression.evaluate('ar', {}, triple))
        self.assertTrue(BooleanExpression.evaluate('ch-vend', {}, triple))
        self.assertTrue(BooleanExpression.evaluate('-vendor-', {}, triple))
        self.assertTrue(BooleanExpression.evaluate('-os', {}, triple))
        self.assertFalse(BooleanExpression.evaluate('arch-os', {}, triple))

        # When matching against the triple, a regex is treated as an identifier and checked
        # for a literal match. This preserves existing behavior before regexes were introduced.
        self.assertFalse(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, triple))
        self.assertTrue(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, 'arch-{{vendor}}-os'))

    def test_matching(self):
        expr1 = 'linux && (target={{aarch64-.+}} || target={{x86_64-.+}})'
        self.assertTrue(BooleanExpression.evaluate(expr1, {'linux', 'target=x86_64-unknown-linux-gnu'}))
        self.assertFalse(BooleanExpression.evaluate(expr1, {'linux', 'target=i386-unknown-linux-gnu'}))

        expr2 = 'use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions'
        self.assertTrue(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12'}))
        self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12', 'no-exceptions'}))
        self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.15'}))

    def test_operators(self):
        self.assertTrue(BooleanExpression.evaluate('true || true', {}))
        self.assertTrue(BooleanExpression.evaluate('true || false', {}))
        self.assertTrue(BooleanExpression.evaluate('false || true', {}))
        self.assertFalse(BooleanExpression.evaluate('false || false', {}))

        self.assertTrue(BooleanExpression.evaluate('true && true', {}))
        self.assertFalse(BooleanExpression.evaluate('true && false', {}))
        self.assertFalse(BooleanExpression.evaluate('false && true', {}))
        self.assertFalse(BooleanExpression.evaluate('false && false', {}))

        self.assertFalse(BooleanExpression.evaluate('!true', {}))
        self.assertTrue(BooleanExpression.evaluate('!false', {}))

        self.assertTrue(BooleanExpression.evaluate('   ((!((false) ))   ) ', {}))
        self.assertTrue(BooleanExpression.evaluate('true && (true && (true))', {}))
        self.assertTrue(BooleanExpression.evaluate('!false && !false && !! !false', {}))
        self.assertTrue(BooleanExpression.evaluate('false && false || true', {}))
        self.assertTrue(BooleanExpression.evaluate('(false && false) || true', {}))
        self.assertFalse(BooleanExpression.evaluate('false && (false || true)', {}))

    # Evaluate boolean expression `expr`.
    # Fail if it does not throw a ValueError containing the text `error`.
    def checkException(self, expr, error):
        try:
            BooleanExpression.evaluate(expr, {})
            self.fail("expression %r didn't cause an exception" % expr)
        except ValueError as e:
            if -1 == str(e).find(error):
                self.fail(("expression %r caused the wrong ValueError\n" +
                           "actual error was:\n%s\n" +
                           "expected error was:\n%s\n") % (expr, e, error))
        except BaseException as e:
            self.fail(("expression %r caused the wrong exception; actual " +
                      "exception was: \n%r") % (expr, e))

    def test_errors(self):
        self.checkException("ba#d",
                            "couldn't parse text: '#d'\n" +
                            "in expression: 'ba#d'")

        self.checkException("true and true",
                            "expected: <end of expression>\n" +
                            "have: 'and'\n" +
                            "in expression: 'true and true'")

        self.checkException("|| true",
                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: '||'\n" +
                            "in expression: '|| true'")

        self.checkException("true &&",
                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: <end of expression>\n" +
                            "in expression: 'true &&'")

        self.checkException("",
                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: <end of expression>\n" +
                            "in expression: ''")

        self.checkException("*",
                            "couldn't parse text: '*'\n" +
                            "in expression: '*'")

        self.checkException("no wait stop",
                            "expected: <end of expression>\n" +
                            "have: 'wait'\n" +
                            "in expression: 'no wait stop'")

        self.checkException("no-$-please",
                            "couldn't parse text: '$-please'\n" +
                            "in expression: 'no-$-please'")

        self.checkException("(((true && true) || true)",
                            "expected: ')'\n" +
                            "have: <end of expression>\n" +
                            "in expression: '(((true && true) || true)'")

        self.checkException("true (true)",
                            "expected: <end of expression>\n" +
                            "have: '('\n" +
                            "in expression: 'true (true)'")

        self.checkException("( )",
                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: ')'\n" +
                            "in expression: '( )'")

        self.checkException("abc{{def",
                            "couldn't parse text: '{{def'\n" +
                            "in expression: 'abc{{def'")

        self.checkException("{{}}",
                            "couldn't parse text: '{{}}'\n" +
                            "in expression: '{{}}'")


if __name__ == '__main__':
    unittest.main()