diff options
Diffstat (limited to 'src/Lexical.kt')
-rw-r--r-- | src/Lexical.kt | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/src/Lexical.kt b/src/Lexical.kt new file mode 100644 index 0000000..bd8bc3a --- /dev/null +++ b/src/Lexical.kt @@ -0,0 +1,64 @@ +class Lexical { + fun analyze(source: String): MutableList<Pair<String, TokenType>> { + var buffer = "" + val tokens = mutableListOf<Pair<String, TokenType>>() + for (i in source.indices) { + if (source.length > i + 1 && getType(source[i]) == getType(source[i + 1])) { + buffer += source[i] + } else { + buffer += source[i] + if (getType(source[i]) != Type.Empty) + tokens += buffer to getTokenType(buffer to getType(source[i])) + buffer = "" + } + } + return tokens + } + + /** + * Matches the characters to groups ([Type]) + */ + private fun getType(char: Char): Type { + return when (char) { + in 'a'..'z', in 'A'..'Z' -> Type.Alphabetic + in '1'..'9' -> Type.Numeric + ' ' -> Type.Empty + else -> Type.Unknown + } + } + + /** + * Matches the tokens to a [TokenType] + */ + private fun getTokenType(token: Pair<String, Type>): TokenType { + var type = when (token.first) { + "print" -> TokenType.Keyword // TODO: DataType matching + "=", "+", "-", "*", "/", "==", ">", "<" -> TokenType.Operator + ";", ":" -> TokenType.Symbol + else -> TokenType.Unknown + } + if (type == TokenType.Unknown) + type = when (token.second) { + Type.Numeric -> TokenType.Constant + Type.Alphabetic -> TokenType.Identifier + else -> TokenType.Unknown + } + return type + } +} + +private enum class Type { + Alphabetic, + Numeric, + Empty, + Unknown +} + +enum class TokenType { + Keyword, + Identifier, + Operator, + Constant, + Symbol, + Unknown +}
\ No newline at end of file |