import exceptions.UnknownType class Lexical { fun analyze(source: String): MutableList> { var buffer = "" var stringMode = false val tokens = mutableListOf>() for (i in source.indices) { buffer += source[i] val tokenType = getTokenType(buffer, if (source.length > i + 1) source[i + 1] else ' ', stringMode) if (tokenType != TokenType.Skip) { tokens += buffer to tokenType if (buffer == "\"") stringMode = true buffer = "" } } return tokens } /** * Matches the tokens to a [TokenType] */ private fun getTokenType(token: String, next: Char, stringMode: Boolean): TokenType { return when { token + next in keyword -> TokenType.Skip token in keyword -> TokenType.Keyword token + next in comparison -> TokenType.Skip token in assignment -> TokenType.Assignment token + next in assignment -> TokenType.Skip token in arithmetic -> TokenType.Arithmetic token + next in comparison -> TokenType.Skip token in comparison -> TokenType.Comparison token + next in comparison -> TokenType.Skip token in logical -> TokenType.Logical (token + next).matches(Regex("[a-zA-Z]*")) -> TokenType.Skip token.matches(Regex("[a-zA-Z]*")) -> TokenType.Identifier (token + next).matches(Regex("[0-9]*")) -> TokenType.Skip token.matches(Regex("[0-9]*")) -> TokenType.Constant token in punctuation -> TokenType.Punctuation token in brackets -> TokenType.Bracket token in classifier -> TokenType.Classifier token.contains(" ") && token.length > 1 && !stringMode -> throw UnknownType(token) token == " " && !stringMode -> TokenType.Empty else -> TokenType.Skip } } private val keyword = listOf("print") // TODO: DataType matching private val assignment = listOf("=", "+=", "-=", "*=", "/*") private val arithmetic = listOf("+", "-", "*", "/", "%") private val comparison = listOf("==", "!=", "<", "<=", ">", ">=") private val logical = listOf("&&", "||", "!") private val punctuation = listOf(",", ":", ".") private val brackets = listOf("(", ")", "[", "]", "{", "}") // TODO: Use brackets for functions private val classifier = listOf("\"") // TODO: Add char mode e.g 'a' } enum class TokenType { Keyword, Assignment, Arithmetic, Comparison, Logical, Identifier, Constant, Punctuation, Bracket, Classifier, Empty, Skip }