From a382841dc7d554b2ba3920d52f73e4cec428743e Mon Sep 17 00:00:00 2001 From: Marvin Borner Date: Mon, 12 Aug 2019 16:01:51 +0200 Subject: Basic parser functions --- .gitignore | 3 +++ example.run | 1 + src/DataTypes.kt | 3 +++ src/Lexical.kt | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/Loader.kt | 12 +++++++++++ src/Main.kt | 5 +++++ src/Testing.kt | 13 ++++++++++++ 7 files changed, 101 insertions(+) create mode 100644 .gitignore create mode 100644 example.run create mode 100644 src/DataTypes.kt create mode 100644 src/Lexical.kt create mode 100644 src/Loader.kt create mode 100644 src/Main.kt create mode 100644 src/Testing.kt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5f05927 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea +out +*.iml \ No newline at end of file diff --git a/example.run b/example.run new file mode 100644 index 0000000..f7d418f --- /dev/null +++ b/example.run @@ -0,0 +1 @@ +print string(2 + 2); \ No newline at end of file diff --git a/src/DataTypes.kt b/src/DataTypes.kt new file mode 100644 index 0000000..290481d --- /dev/null +++ b/src/DataTypes.kt @@ -0,0 +1,3 @@ +class Int { + +} \ No newline at end of file diff --git a/src/Lexical.kt b/src/Lexical.kt new file mode 100644 index 0000000..bd8bc3a --- /dev/null +++ b/src/Lexical.kt @@ -0,0 +1,64 @@ +class Lexical { + fun analyze(source: String): MutableList> { + var buffer = "" + val tokens = mutableListOf>() + for (i in source.indices) { + if (source.length > i + 1 && getType(source[i]) == getType(source[i + 1])) { + buffer += source[i] + } else { + buffer += source[i] + if (getType(source[i]) != Type.Empty) + tokens += buffer to getTokenType(buffer to getType(source[i])) + buffer = "" + } + } + return tokens + } + + /** + * Matches the characters to groups ([Type]) + */ + private fun getType(char: Char): Type { + return when (char) { + in 'a'..'z', in 'A'..'Z' -> Type.Alphabetic + in '1'..'9' -> Type.Numeric + ' ' -> Type.Empty + else -> Type.Unknown + } + } + + /** + * Matches the tokens to a [TokenType] + */ + private fun getTokenType(token: Pair): TokenType { + var type = when (token.first) { + "print" -> TokenType.Keyword // TODO: DataType matching + "=", "+", "-", "*", "/", "==", ">", "<" -> TokenType.Operator + ";", ":" -> TokenType.Symbol + else -> TokenType.Unknown + } + if (type == TokenType.Unknown) + type = when (token.second) { + Type.Numeric -> TokenType.Constant + Type.Alphabetic -> TokenType.Identifier + else -> TokenType.Unknown + } + return type + } +} + +private enum class Type { + Alphabetic, + Numeric, + Empty, + Unknown +} + +enum class TokenType { + Keyword, + Identifier, + Operator, + Constant, + Symbol, + Unknown +} \ No newline at end of file diff --git a/src/Loader.kt b/src/Loader.kt new file mode 100644 index 0000000..2161c51 --- /dev/null +++ b/src/Loader.kt @@ -0,0 +1,12 @@ +import java.io.File +import java.io.InputStream + +class Loader(path: String) { + private val inputStream: InputStream = File(path).inputStream() + private val inputString = inputStream.bufferedReader().use { it.readText() } + + // TODO: Add preprocessor managing imports + fun load(): String { + return inputString + } +} \ No newline at end of file diff --git a/src/Main.kt b/src/Main.kt new file mode 100644 index 0000000..24a2e08 --- /dev/null +++ b/src/Main.kt @@ -0,0 +1,5 @@ +import Testing + +fun main() { + Testing() +} diff --git a/src/Testing.kt b/src/Testing.kt new file mode 100644 index 0000000..0b16145 --- /dev/null +++ b/src/Testing.kt @@ -0,0 +1,13 @@ +class Testing { + init { + val source = Loader("/home/melvin/coding/run/example.run").load() + val tokens = Lexical().analyze(source) + for (token in tokens) { + print(token.first) + print("\n") + print(token.second) + print("\n") + } + print(source) + } +} \ No newline at end of file -- cgit v1.2.3