aboutsummaryrefslogtreecommitdiff
path: root/src/Lexical.kt
blob: bd8bc3af39a1b5a820b0158f7f37cf42cf37c73d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class Lexical {
    fun analyze(source: String): MutableList<Pair<String, TokenType>> {
        var buffer = ""
        val tokens = mutableListOf<Pair<String, TokenType>>()
        for (i in source.indices) {
            if (source.length > i + 1 && getType(source[i]) == getType(source[i + 1])) {
                buffer += source[i]
            } else {
                buffer += source[i]
                if (getType(source[i]) != Type.Empty)
                    tokens += buffer to getTokenType(buffer to getType(source[i]))
                buffer = ""
            }
        }
        return tokens
    }

    /**
     * Matches the characters to groups ([Type])
     */
    private fun getType(char: Char): Type {
        return when (char) {
            in 'a'..'z', in 'A'..'Z' -> Type.Alphabetic
            in '1'..'9' -> Type.Numeric
            ' ' -> Type.Empty
            else -> Type.Unknown
        }
    }

    /**
     * Matches the tokens to a [TokenType]
     */
    private fun getTokenType(token: Pair<String, Type>): TokenType {
        var type = when (token.first) {
            "print" -> TokenType.Keyword // TODO: DataType matching
            "=", "+", "-", "*", "/", "==", ">", "<" -> TokenType.Operator
            ";", ":" -> TokenType.Symbol
            else -> TokenType.Unknown
        }
        if (type == TokenType.Unknown)
            type = when (token.second) {
                Type.Numeric -> TokenType.Constant
                Type.Alphabetic -> TokenType.Identifier
                else -> TokenType.Unknown
            }
        return type
    }
}

private enum class Type {
    Alphabetic,
    Numeric,
    Empty,
    Unknown
}

enum class TokenType {
    Keyword,
    Identifier,
    Operator,
    Constant,
    Symbol,
    Unknown
}