import re class Token: # Lexeme types NUMBER = 0 PLUS = 1 MINUS = 2 MUL = 3 DIV = 4 LPAR = 5 RPAR = 6 X = 7 NAME = 8 ENDLINE = 9 lexNames = [ "NUMBER", "PLUS", "MINUS", "MUL", "DIV", "LPAR", "RPAR", "X", "NAME", "ENDLINE" ] def __init__(self, token = NUMBER, text = "", value = None): self.token = token self.text = text self.value = value def __str__(self): # str(x) res = "(" + Token.lexNames[self.token] if self.text == "": res += ")" return res res += ", \"" + self.text + "\"" if self.value != None: res += ", " res += str(self.value) res += ")" return res def __repr__(self): # repr(x) return "Token" + str(self) def __eq__(self, t): if type(t) == Token: return (t.token == self.token) elif type(t) == int: return t == self.token else: return False def __ne__(self, t): return not (self == t) expressions = [ (r"[0-9]+(\.[0-9]*)?", Token.NUMBER), (r"\+", Token.PLUS), (r"\-", Token.MINUS), (r"\*", Token.MUL), (r"\/", Token.DIV), (r"\(", Token.LPAR), (r"\)", Token.RPAR), (r"x|X", Token.X), (r"[a-zA-Z_][a-zA-Z_0-9]*", Token.NAME) ] regexpression = [ (re.compile(e[0]), e[1]) for e in expressions ] whiteSpace = re.compile(r"\s+") def scanText(txt): """Split a text into tokens""" res = [] l = len(txt) pos = 0 while pos < l: m = whiteSpace.match(txt[pos:]) if m != None: pos += m.end() if pos >= l: break tokenType = None tokenText = None tokenValue = None for (p, lexType) in regexpression: m = p.match(txt[pos:]) if m != None: tokenType = lexType tokenText = m.group() if lexType == Token.NUMBER: tokenValue = float(tokenText) pos += m.end() break if tokenType == None: raise SyntaxError("Illegal token: " + txt[pos:]) res.append(Token(tokenType, tokenText, tokenValue)) res.append(Token(Token.ENDLINE, "$")) return res