aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarvin Borner2019-08-12 18:19:44 +0200
committerMarvin Borner2019-08-12 18:19:44 +0200
commitbf5882f74f79a15dc5688e813e7d42ae62a44263 (patch)
tree6ff882c018fa102c0baeabcae24053a51cdf3bb8
parenta382841dc7d554b2ba3920d52f73e4cec428743e (diff)
Rewritten lexical analyser
-rw-r--r--src/Lexical.kt85
-rw-r--r--src/Loader.kt2
2 files changed, 46 insertions, 41 deletions
diff --git a/src/Lexical.kt b/src/Lexical.kt
index bd8bc3a..7b92755 100644
--- a/src/Lexical.kt
+++ b/src/Lexical.kt
@@ -3,12 +3,10 @@ class Lexical {
var buffer = ""
val tokens = mutableListOf<Pair<String, TokenType>>()
for (i in source.indices) {
- if (source.length > i + 1 && getType(source[i]) == getType(source[i + 1])) {
- buffer += source[i]
- } else {
- buffer += source[i]
- if (getType(source[i]) != Type.Empty)
- tokens += buffer to getTokenType(buffer to getType(source[i]))
+ buffer += source[i]
+ val tokenType = getTokenType(buffer, if (source.length > i + 1) source[i + 1] else ' ')
+ if (tokenType != TokenType.Skip) {
+ tokens += buffer to tokenType
buffer = ""
}
}
@@ -16,49 +14,56 @@ class Lexical {
}
/**
- * Matches the characters to groups ([Type])
- */
- private fun getType(char: Char): Type {
- return when (char) {
- in 'a'..'z', in 'A'..'Z' -> Type.Alphabetic
- in '1'..'9' -> Type.Numeric
- ' ' -> Type.Empty
- else -> Type.Unknown
- }
- }
-
- /**
* Matches the tokens to a [TokenType]
*/
- private fun getTokenType(token: Pair<String, Type>): TokenType {
- var type = when (token.first) {
- "print" -> TokenType.Keyword // TODO: DataType matching
- "=", "+", "-", "*", "/", "==", ">", "<" -> TokenType.Operator
- ";", ":" -> TokenType.Symbol
- else -> TokenType.Unknown
+ private fun getTokenType(token: String, next: Char): TokenType {
+ return when {
+ token + next in keyword -> TokenType.Skip
+ token in keyword -> TokenType.Keyword
+
+ token + next in comparison -> TokenType.Skip
+ token in assignment -> TokenType.Assignment
+
+ token + next in assignment -> TokenType.Skip
+ token in arithmetic -> TokenType.Arithmetic
+
+ token + next in comparison -> TokenType.Skip
+ token in comparison -> TokenType.Comparison
+
+ token + next in comparison -> TokenType.Skip
+ token in logical -> TokenType.Logical
+
+ (token + next).matches(Regex("[a-zA-Z]*")) -> TokenType.Skip
+ token.matches(Regex("[a-zA-Z]*")) -> TokenType.Identifier
+
+ (token + next).matches(Regex("[0-9]*")) -> TokenType.Skip
+ token.matches(Regex("[0-9]*")) -> TokenType.Constant
+
+ token in punctuation -> TokenType.Punctuation
+
+ token == " " -> TokenType.Empty
+
+ else -> TokenType.Skip
}
- if (type == TokenType.Unknown)
- type = when (token.second) {
- Type.Numeric -> TokenType.Constant
- Type.Alphabetic -> TokenType.Identifier
- else -> TokenType.Unknown
- }
- return type
}
-}
-private enum class Type {
- Alphabetic,
- Numeric,
- Empty,
- Unknown
+ private val keyword = listOf("print") // TODO: DataType matching
+ private val assignment = listOf("=", "+=", "-=", "*=", "/*")
+ private val arithmetic = listOf("+", "-", "*", "/", "%")
+ private val comparison = listOf("==", "!=", "<", "<=", ">", ">=")
+ private val logical = listOf("&&", "||", "!")
+ private val punctuation = listOf(",", ":", ".")
}
enum class TokenType {
Keyword,
+ Assignment,
+ Arithmetic,
+ Comparison,
+ Logical,
+ Punctuation,
Identifier,
- Operator,
Constant,
- Symbol,
- Unknown
+ Empty,
+ Skip
} \ No newline at end of file
diff --git a/src/Loader.kt b/src/Loader.kt
index 2161c51..1972939 100644
--- a/src/Loader.kt
+++ b/src/Loader.kt
@@ -5,7 +5,7 @@ class Loader(path: String) {
private val inputStream: InputStream = File(path).inputStream()
private val inputString = inputStream.bufferedReader().use { it.readText() }
- // TODO: Add preprocessor managing imports
+ // TODO: Add preprocessor managing imports and comments
fun load(): String {
return inputString
}