byo_compiler/pl_comp.py

class LoopBreak(Exception):
    def __init__(self):
        super().__init__('`break` outside a loop')


class LoopContinue(Exception):
    def __init__(self):
        super().__init__('`continue` outside a loop')


class FuncReturn(Exception):
    def __init__(self, val):
        super().__init__('`return` outside a function')
        self.val = val


def parse_expr(s: str, idx: int):
    idx = skip_space(s, idx)
    if s[idx] == '(':
        # a list
        idx += 1
        l = []
        while True:
            idx = skip_space(s, idx)
            if idx >= len(s):
                raise Exception('unbalanced parenthesis')
            if s[idx] == ')':
                idx += 1
                break

            idx, v = parse_expr(s, idx)
            l.append(v)
        return idx, l
    elif s[idx] == ')':
        raise Exception('bad parenthesis')
    else:
        # an atom
        start = idx
        while idx < len(s) and (not s[idx].isspace()) and s[idx] not in '()':
            idx += 1
        if start == idx:
            raise Exception('empty program')
        return idx, parse_atom(s[start:idx])


def skip_space(s: str, idx: int):
    while True:
        save = idx
        while idx < len(s) and s[idx].isspace():
            idx += 1
        if idx < len(s) and s[idx] == ';':
            idx += 1
            while idx < len(s) and s[idx] != '\n':
                idx += 1
        if idx == save:
            break
    return idx


def parse_atom(s):
    # TODO: implement this
    import json
    try:
        return ['val', json.loads(s)]
    except json.JSONDecodeError:
        return s


def pl_parse(s):
    idx, node = parse_expr(s, 0)
    idx = skip_space(s, idx)
    if idx < len(s):
        raise ValueError('trailing garbage')
    return node


def name_lookup(env, key):
    while env:
        current, env = env
        if key in current:
            return current
    raise ValueError('undefined name')


def pl_eval(env, node):
    if not isinstance(node, list):
        assert isinstance(node, str)
        return name_lookup(env, node)[node]

    if len(node) == 0:
        raise ValueError('empty list')

    if len(node) == 2 and node[0] == 'val':
        return node[1]

    if node[0] == 'var' and len(node) == 3:
        _, name, val = node
        scope, _ = env
        if name in scope:
            raise ValueError('duplicated name')
        val = pl_eval(env, val)
        scope[name] = val
        return val

    if node[0] == 'set' and len(node) == 3:
        _, name, val = node
        scope = name_lookup(env, name)
        val = pl_eval(env, val)
        scope[name] = val
        return val

    if node[0] in ('do', 'then', 'else') and len(node) > 1:
        new_env = (dict(), env)
        # TODO: Why is Smith using the same variable name for the array items
        # and their evaluations? Try using a different variable name for the
        # latter.
        for val in node[1:]:
            val = pl_eval(new_env, val)
        return val

    import operator
    binops = {
        '+': operator.add,
        '-': operator.sub,
        '*': operator.mul,
        '/': operator.truediv,
        'eq': operator.eq,
        'ne': operator.ne,
        'ge': operator.ge,
        'gt': operator.gt,
        'le': operator.le,
        'lt': operator.lt,
        'and': operator.and_,
        'or': operator.or_,
    }
    if len(node) == 3 and node[0] in binops:
        op = binops[node[0]]
        return op(pl_eval(env, node[1]), pl_eval(env, node[2]))

    unops = {
        '-': operator.neg,
        'not': operator.not_,
    }
    if len(node) == 2 and node[0] in unops:
        op = unops[node[0]]
        return op(pl_eval(env, node[1]))

    if len(node) in (3, 4) and node[0] in ('?', 'if'):
        _, cond, yes, *no = node
        no = no[0] if no else ['val', None]
        new_env = (dict(), env)
        if pl_eval(new_env, cond):
            return pl_eval(new_env, yes)
        else:
            return pl_eval(new_env, no)

    if node[0] == 'loop' and len(node) == 3:
        _, cond, body = node
        ret = None
        while True:
            new_env = (dict(), env)
            if not pl_eval(new_env, cond):
                break
            try:
                ret = pl_eval(new_env, body)
            except LoopBreak:
                break
            except LoopContinue:
                continue
        return ret

    if node[0] == 'break' and len(node) == 1:
        raise LoopBreak
    if node[0] == 'continue' and len(node) == 1:
        raise LoopContinue

    # function definition
    if node[0] == 'def' and len(node) == 4:
        _, name, args, body = node
        # sanity checks
        for arg_name in args:
            if not isinstance(arg_name, str):
                raise ValueError('bad argument name')
        if len(args) != len(set(args)):
            raise ValueError('duplicated arguments')
        # add the function to the scope
        dct, _ = env
        key = (name, len(args))
        if key in dct:
            raise ValueError('duplicated function')
        dct[key] = (args, body, env)
        return

    # function call
    if node[0] == 'call' and len(node) >= 2:
        _, name, *args = node
        key = (name, len(args))
        fargs, fbody, fenv = name_lookup(env, key)[key]
        # args
        new_env = dict()
        for arg_name, arg_val in zip(fargs, args):
            new_env[arg_name] = pl_eval(env, arg_val)
        # call
        try:
            return pl_eval((new_env, fenv), fbody)
        except FuncReturn as ret:
            return ret.val

    # return
    if node[0] == 'return' and len(node) == 1:
        raise FuncReturn(None)
    if node[0] == 'return' and len(node) == 2:
        _, val = node
        raise FuncReturn(pl_eval(env, val))

    if node[0] == 'print':
        return print(*(pl_eval(env, val) for val in node[1:]))

    raise ValueError('unknown expression')


def pl_parse_prog(s):
    return pl_parse('(do ' + s + ')')


def test_eval():
    def f(s):
        return pl_eval(dict(), pl_parse_prog(s))
    assert f('1') == 1
    assert f('(+ 1 3)') == 4
    assert f('(? (lt 1 3) "yes" "no")') == "yes"
    assert f('(print 1 2 3)') is None
    assert f('''
        ;; first scope
        (var a 1)
        (var b (+ a 1))
        ;; a=1, b=2
        (do
            ;; new scope
            (var a (+ b 5)) ;; name collision
            (set b (+ a 10))
        )
        ;; a=1, b=17
        (* a b)
    ''') == 17
    assert f('''
        (def gauss (n)
             (if (le n 0)
                 (then 0)
                 (else (+ n (call gauss (- n 1))))))
        (call gauss 5)
    ''') == 5 + 4 + 3 + 2 + 1
    assert f('''
        (def gauss (n) (do
             (var r 0)
             (loop (gt n 0) (do
                 (set r (+ r n))
                 (set n (- n 1))
             ))
             (return r)
        ))
        (call gauss 5)
    ''') == 5 + 4 + 3 + 2 + 1


test_eval()  # Run this file as python `/path/to/pl_comp.py`; should work