Skip to content

Commit 0c7c0ef

Browse files
refactor: hard keywords, operators and separators
1 parent 084bb9e commit 0c7c0ef

File tree

4 files changed

+103
-33
lines changed

4 files changed

+103
-33
lines changed

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/Lexer.kt

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package io.github.cybercodernaj.parkour.lexer
22

3-
import arrow.core.*
3+
import arrow.core.None
4+
import arrow.core.Option
45
import arrow.core.raise.OptionRaise
56
import arrow.core.raise.option
67
import io.github.cybercodernaj.parkour.datasource.TextSource
@@ -38,9 +39,9 @@ class Lexer(
3839
private val singleLineComments: Regex? = null,
3940
private val multilineComments: Pair<Regex, Regex>? = null,
4041
private val identifiers: Regex = Regex("""[a-zA-Z_]\w*"""),
41-
private val hardKeywords: Set<String> = emptySet(),
42-
private val operators: Set<String> = emptySet(),
43-
private val separators: Set<String> = emptySet(),
42+
val hardKeywords: Set<String> = emptySet(),
43+
val operators: Set<String> = emptySet(),
44+
val separators: Set<String> = emptySet(),
4445
private val literals: Literals = Literals()
4546
) {
4647
private var position: Position = Position(0, 0)
@@ -68,15 +69,21 @@ class Lexer(
6869

6970
private var insideMultilineComment = false
7071

71-
private val _hardKeywords = StringTrieMap<Kind>()
72+
private val definitions = StringTrieMap<Kind>()
7273

7374
private enum class Kind {
7475
KEYWORD, OPERATOR, SEPARATOR
7576
}
7677

7778
init {
7879
hardKeywords.forEach {
79-
_hardKeywords[it] = Kind.KEYWORD
80+
definitions[it] = Kind.KEYWORD
81+
}
82+
operators.forEach {
83+
definitions[it] = Kind.OPERATOR
84+
}
85+
separators.forEach {
86+
definitions[it] = Kind.SEPARATOR
8087
}
8188
}
8289

@@ -99,7 +106,9 @@ class Lexer(
99106
// If the file is fully read, then return EOF
100107
currentLine.onNone { return Token.EOF }
101108

102-
val winner = listOf(identifiers()).firstNotNullOfOrNull { it.getOrNull() }
109+
val winner = listOf(definitions(), identifiers())
110+
.mapNotNull { it.getOrNull() }
111+
.maxByOrNull { it.size }
103112

104113
if (winner != null) {
105114
position = position.copy(col = winner.end.col + 1)
@@ -122,6 +131,23 @@ class Lexer(
122131
}
123132
}
124133

134+
private fun definitions(): Option<Token> = option {
135+
val line = currentLine.bind().substring(position.col)
136+
val kind = definitions.getLongest(line)
137+
val token = kind?.let { (k, length) ->
138+
val content = line.substring(0, length)
139+
val end = position + (length - 1)
140+
when (k) {
141+
Kind.KEYWORD -> Token.Keyword(content, position, end)
142+
Kind.OPERATOR -> Token.Operator(content, position, end)
143+
Kind.SEPARATOR -> Token.Separator(content, position, end)
144+
}
145+
}
146+
if (token == null)
147+
raise(None)
148+
token
149+
}
150+
125151
private fun identifiers(): Option<Token.Identifier> = option {
126152
val match = startsWith(identifiers)
127153

@@ -147,9 +173,6 @@ class Lexer(
147173
}
148174
}
149175

150-
private fun <A> Option<A>.getOrThrow(cause: () -> Exception): A {
151-
return getOrNull() ?: throw cause()
152-
}
153176

154177
// private fun updateTokenStream() {
155178
// fetchNextLine()

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/StringTrieMap.kt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,29 @@ internal class StringTrieMap<V> {
2424
}
2525
return if (node.terminal) node.value else null
2626
}
27+
28+
/**
29+
* The [content] will walk down the trie to find a terminal value with the highest depth.
30+
*
31+
* @param content the line to find the longest match
32+
* @return the terminal value and the depth of trie traversed. Null if not found
33+
*/
34+
fun getLongest(content: String): Pair<V, Int>? {
35+
var node = root
36+
var candidate: V? = null
37+
var depth = 0
38+
for (elem in content) {
39+
if (elem !in node.children) {
40+
return candidate?.let { it to depth }
41+
}
42+
node = node.children[elem]!!
43+
depth++
44+
if (node.terminal) {
45+
candidate = node.value!!
46+
}
47+
}
48+
return candidate?.let { it to depth }
49+
}
2750
}
2851

2952
private data class TrieNode<T, U>(

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/Token.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ internal sealed class Token(val value: Any, val start: Position, val end: Positi
2323
data object EOF : Token(Any(), Position(-1, -1), Position(-1, -1))
2424

2525
val size: Int
26-
get() = this.end - this.start
26+
get() = this.end - this.start + 1
2727

2828
override fun equals(other: Any?): Boolean {
2929
if (this === other) return true

core/src/test/kotlin/io/github/cybercodernaj/parkour/lexer/LexerKeywordTest.kt

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package io.github.cybercodernaj.parkour.lexer
22

33
import io.github.cybercodernaj.parkour.datasource.StringSource
4+
import io.github.cybercodernaj.parkour.testutils.assertTokens
45
import io.github.cybercodernaj.parkour.utils.Position
56
import org.junit.jupiter.api.Test
67
import kotlin.test.assertEquals
@@ -25,29 +26,31 @@ class LexerKeywordTest {
2526
}
2627

2728
@Test
28-
fun `returns 2 keywords in same line`() {
29-
lexer.source = StringSource("val class")
29+
fun `returns 3 keywords in same line`() {
30+
lexer.source = StringSource("val class var")
3031

31-
val token = lexer.nextToken()
32-
assertEquals(
33-
Token.Keyword(
34-
value = "val",
35-
start = Position(0, 0),
36-
end = Position(0, 2),
37-
soft = false
38-
),
39-
token
40-
)
41-
42-
val token2 = lexer.nextToken()
43-
assertEquals(
44-
Token.Keyword(
45-
value = "class",
46-
start = Position(0, 4),
47-
end = Position(0, 8),
48-
soft = false
49-
),
50-
token2
32+
assertTokens(
33+
lexer,
34+
listOf(
35+
Token.Keyword(
36+
value = "val",
37+
start = Position(0, 0),
38+
end = Position(0, 2),
39+
soft = false
40+
),
41+
Token.Keyword(
42+
value = "class",
43+
start = Position(0, 4),
44+
end = Position(0, 8),
45+
soft = false
46+
),
47+
Token.Keyword(
48+
value = "var",
49+
start = Position(0, 10),
50+
end = Position(0, 12),
51+
soft = false
52+
),
53+
)
5154
)
5255
}
5356

@@ -106,4 +109,25 @@ class LexerKeywordTest {
106109
val token3 = lexer.nextToken()
107110
assertEquals(Token.EOF, token3)
108111
}
112+
113+
@Test
114+
fun `differentiate between keyword and identifier`() {
115+
lexer.source = StringSource("val value")
116+
117+
assertTokens(
118+
lexer,
119+
listOf(
120+
Token.Keyword(
121+
value = "val",
122+
start = Position(0, 0),
123+
end = Position(0, 2)
124+
),
125+
Token.Identifier(
126+
value = "value",
127+
start = Position(0, 4),
128+
end = Position(0, 8)
129+
)
130+
)
131+
)
132+
}
109133
}

0 commit comments

Comments
 (0)