Compilers and REPL
Posted on March 1, 2020
Tags: typetheory
def REPL():
= input() #READ
x = eval(x) #EVAL
y print(y) #PRINT
#LOOP REPL()
0.0.1 Tokenizer
Tokenizing is simply the act of converting a string to a list of elements.
0.0.2 A Very Simple LISP math eval for AST
- Racket treats list like (+ 2 3) as AST.
- Head of list is the operator.
- Tail of list are the arguments
- Head of list is the operator.
def helperStr_Int(k):
try:
return int(k)
except:
return k
={"+": lambda x,y: x+y}
str_to_OP
def eval_naive(AST):
= str_to_OP[AST[0]]
operator = list(map(lambda x: helperStr_Int(x),AST[1:]))
args return operator(*args)
= ["+","2","3"]
test
"+","2","3"]) eval_naive([
0.0.3 Eval with nested AST
={"+": lambda x,y: x+y,
str_to_OP"define": lambda x:3}
def helperStr_Int(k):
try:
return int(k)
except:
return k
def eval_naive(AST):
= str_to_OP[AST[0]]
operator = list(map(lambda x: helperStr_Int(x) ,AST[1:]))
args = list(map(lambda x: eval_naive(x) if isinstance(x,list) else x, args))
RECURSEargs #Recursively calls eval if another AST is found in the args.
return operator(*RECURSEargs)
"+",["+","1","2"],"3"]) eval_naive([
0.0.4 Using Lambdas to define a local environment
In lambda calculus we can think of everything as a lambdas, everything including the local environment.
\((\lambda x. sourceCode)(2) \Rightarrow sourceCode[2/variable]\)
=2
x###sourceCode
if(..):
doSomething(x)=x*..
y
.....###sourceCode
We can’t do this naively in python because python does eager evaluation.
A way to get around that is converting everything to string then using eval
which simulates a lazy evaluation.
#wrapCode builds our lazy lambda expression string
def wrapCode(sourceCode,var,val):
return f"(lambda {var}: {sourceCode})({val})"
"print(x+y)","x",2),"y",3)
wrapCode(wrapCode(#> '(lambda y: (lambda x: print(x+y))(2))(3)'
eval(wrapCode(wrapCode("print(x+y)","x",2),"y",3))
#> 5
data Expr = Var String
| Const Int
| Plus Expr Expr
| Times Expr Expr
| Sin Expr
| Cos Expr
deriving Show
diff :: Expr -> String -> Expr
Var y) x | y == x = Const 1
diff (| otherwise = Const 0
Const _) _ = Const 0
diff (Plus e1 e2) x = Plus (diff e1 x) (diff e2 x)
diff (Times e1 e2) x = Plus (Times (diff e1 x) e2)
diff (Times (diff e2 x) e1)
(Sin e1) x = Times (Cos e1) (diff e1 x)
diff (Cos e1) x = Times (Times (Const (-1)) (Sin e1)) (diff e1 x) diff (
1 Tokenizer
Tokenize a math expression
eg. “-2 * 4” => [“-2”, “*“,”4”]
= lambda k : k == ' ' or k == '\t'
isspace
def is_number(k):
try:
float(k)
return True
except ValueError:
return False
def token_single(x):
if len(x) == 0:
return []
elif len(x) == 1:
return [x]
elif isspace(x[0]):
return token_single(x[1:])
elif x[0].isdigit() or x[0] == '-':
if x[0].isdigit() and isspace(x[1]):
return [x[0]] + token_multi(x[2:])
else:
= token_multi(x[1:])
t if t == []:
return [x[0]]
else:
if is_number(t[0]):
0] = x[0] + t[0]
t[return t
else:
return [x[0]] + t
else:
if x[0] == '(':
= token_single(x[1:])
t else:
= token_multi(x[1:])
t return [x[0]] + t
def token_multi(x):
if len(x) == 0:
return []
elif len(x) == 1:
return [x]
elif isspace(x[0]):
return token_multi(x[1:])
elif x[0].isdigit():
if isspace(x[1]):
return [x[0]] + token_multi(x[2:])
else:
= token_multi(x[1:])
t if t == []:
return [x[0]]
else:
if is_number(t[0]):
0] = x[0] + t[0]
t[return t
else:
return [x[0]] + t
else:
if x[0] == '(':
= token_single(x[1:])
t else:
= token_multi(x[1:])
t return [x[0]] + t
'-2') token_multi(
2 Compiler
2.1 Terminology
- Lexer - reads input stream of characters and convert them to tokens