From a382841dc7d554b2ba3920d52f73e4cec428743e Mon Sep 17 00:00:00 2001 From: Marvin Borner Date: Mon, 12 Aug 2019 16:01:51 +0200 Subject: Basic parser functions --- src/Lexical.kt | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 src/Lexical.kt (limited to 'src/Lexical.kt') diff --git a/src/Lexical.kt b/src/Lexical.kt new file mode 100644 index 0000000..bd8bc3a --- /dev/null +++ b/src/Lexical.kt @@ -0,0 +1,64 @@ +class Lexical { + fun analyze(source: String): MutableList> { + var buffer = "" + val tokens = mutableListOf>() + for (i in source.indices) { + if (source.length > i + 1 && getType(source[i]) == getType(source[i + 1])) { + buffer += source[i] + } else { + buffer += source[i] + if (getType(source[i]) != Type.Empty) + tokens += buffer to getTokenType(buffer to getType(source[i])) + buffer = "" + } + } + return tokens + } + + /** + * Matches the characters to groups ([Type]) + */ + private fun getType(char: Char): Type { + return when (char) { + in 'a'..'z', in 'A'..'Z' -> Type.Alphabetic + in '1'..'9' -> Type.Numeric + ' ' -> Type.Empty + else -> Type.Unknown + } + } + + /** + * Matches the tokens to a [TokenType] + */ + private fun getTokenType(token: Pair): TokenType { + var type = when (token.first) { + "print" -> TokenType.Keyword // TODO: DataType matching + "=", "+", "-", "*", "/", "==", ">", "<" -> TokenType.Operator + ";", ":" -> TokenType.Symbol + else -> TokenType.Unknown + } + if (type == TokenType.Unknown) + type = when (token.second) { + Type.Numeric -> TokenType.Constant + Type.Alphabetic -> TokenType.Identifier + else -> TokenType.Unknown + } + return type + } +} + +private enum class Type { + Alphabetic, + Numeric, + Empty, + Unknown +} + +enum class TokenType { + Keyword, + Identifier, + Operator, + Constant, + Symbol, + Unknown +} \ No newline at end of file -- cgit v1.2.3