Skip to content

Commit 1fb7726

Browse files
refactor: introduced LexerCommons and wip: fixing test cases
1 parent 2128a42 commit 1fb7726

34 files changed

+1023
-180
lines changed

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/LexerBuilder.kt

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
package io.github.cybercodernaj.parkour.lexer
22

33
import io.github.cybercodernaj.parkour.lexer.internal.Lexer
4-
import io.github.cybercodernaj.parkour.exceptions.LexicalException
54

65
/**
76
* A helper class to create the [Lexer].
87
* Contains functions to be used as part of the [lexer] DSL.
9-
* Each property's default value is detailed in [Lexer.Defaults].
8+
* Each property's default value is detailed in [LexerCommons].
109
*
1110
* @author Nishant Aanjaney Jalan
1211
* @since 0.2.0
@@ -24,11 +23,10 @@ class LexerBuilder internal constructor() {
2423
* }
2524
* ```
2625
*
27-
* @see Lexer.Defaults.ignorePattern
2826
* @author Nishant Aanjaney Jalan
2927
* @since 0.2.0
3028
*/
31-
var ignorePattern: Regex = Lexer.Defaults.ignorePattern
29+
var ignorePattern: Regex? = null
3230

3331
/**
3432
* When the lexer identifies a [singleLineComments] pattern, it will skip to the next line
@@ -42,11 +40,10 @@ class LexerBuilder internal constructor() {
4240
* }
4341
* ```
4442
*
45-
* @see Lexer.Defaults.singleLineComments
4643
* @author Nishant Aanjaney Jalan
4744
* @since 0.2.0
4845
*/
49-
var singleLineComments: Regex? = Lexer.Defaults.singleLineComments
46+
var singleLineComments: Regex? = null
5047

5148
/**
5249
* There are two parts to [multilineComments]: the starting and the ending pattern.
@@ -62,11 +59,10 @@ class LexerBuilder internal constructor() {
6259
* }
6360
* ```
6461
*
65-
* @see Lexer.Defaults.multilineComments
6662
* @author Nishant Aanjaney Jalan
6763
* @since 0.2.0
6864
*/
69-
var multilineComments: Pair<Regex, Regex>? = Lexer.Defaults.multilineComments
65+
var multilineComments: Pair<Regex, Regex>? = null
7066

7167
/**
7268
* The regex pattern that defines the rules for identifiers.
@@ -82,11 +78,11 @@ class LexerBuilder internal constructor() {
8278
* }
8379
* ```
8480
*
85-
* @see Lexer.Defaults.identifiers
81+
* @see LexerCommons.identifiers
8682
* @author Nishant Aanjaney Jalan
8783
* @since 0.2.0
8884
*/
89-
var identifiers: Regex = Lexer.Defaults.identifiers
85+
var identifiers: Regex? = null
9086

9187
private val _hardKeywords: MutableList<String> = mutableListOf()
9288
internal val hardKeywords: List<String> get() = _hardKeywords
@@ -100,6 +96,7 @@ class LexerBuilder internal constructor() {
10096
/**
10197
* The regex that detects and extracts integer literals from the string.
10298
* This should usually only consider strict integers.
99+
* This configuration is required only if your integer pattern is different from [LexerCommons.integerLiteral]
103100
*
104101
* ### Usage
105102
*
@@ -109,15 +106,16 @@ class LexerBuilder internal constructor() {
109106
* }
110107
* ```
111108
*
112-
* @see Lexer.Defaults.integerLiteral
109+
* @see LexerCommons.integerLiteral
113110
* @author Nishant Aanjaney Jalan
114111
* @since 0.2.0
115112
*/
116-
var integerLiteral: Regex = Lexer.Defaults.integerLiteral
113+
var integerLiteral: Regex = LexerCommons.integerLiteral
117114

118115
/**
119116
* The regex that detects and extracts floating point literals from the string.
120117
* This should usually only consider numbers that have a decimal point.
118+
* This configuration is required only if your floating pattern is different from [LexerCommons.floatingLiteral]
121119
*
122120
* ### Usage
123121
*
@@ -127,29 +125,30 @@ class LexerBuilder internal constructor() {
127125
* }
128126
* ```
129127
*
130-
* @see Lexer.Defaults.floatingLiteral
128+
* @see LexerCommons.floatingLiteral
131129
* @author Nishant Aanjaney Jalan
132130
* @since 0.2.0
133131
*/
134-
var floatingLiteral: Regex = Lexer.Defaults.floatingLiteral
132+
var floatingLiteral: Regex = LexerCommons.floatingLiteral
135133

136134
/**
137135
* The enclosing strings that should denote the start and end of single-line strings.
138136
* The lexer will throw a [LexicalException] when a string literal is not terminated in the same line.
137+
* This configuration is required only if your string defining patterns are different from [LexerCommons.floatingLiteral]
139138
*
140139
* ### Usage
141140
*
142141
* ```kt
143142
* val myLexer = lexer {
144-
* floatingLiteral = Regex("[-+]?[0-9_]*\.[0-9_]+(?:[eE][-+]?[0-9_]+)?")
143+
* singleLineString = setOf("'", "\"")
145144
* }
146145
* ```
147146
*
148-
* @see Lexer.Defaults.singleLineString
147+
* @see LexerCommons.singleLineString
149148
* @author Nishant Aanjaney Jalan
150149
* @since 0.2.0
151150
*/
152-
var singleLineString: Set<String> = Lexer.Defaults.singleLineString
151+
var singleLineString: Set<String> = LexerCommons.singleLineString
153152

154153
/**
155154
* Hard keywords are a characters and symbols that give a particular meaning to a program.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package io.github.cybercodernaj.parkour.lexer
2+
3+
/**
4+
* A list of common patterns and lists of items that most programming languages and
5+
* data serialization formats.
6+
*
7+
* @author Nishant Aanjaney Jalan
8+
* @since 0.2.0
9+
*/
10+
object LexerCommons {
11+
val identifiers = Regex("""[a-zA-Z_]\w*""")
12+
val integerLiteral = Regex("""[-+]?[0-9_]+""")
13+
val floatingLiteral = Regex("""[-+]?[0-9_]*\.[0-9_]+(?:[eE][-+]?[0-9_]+)?""")
14+
val singleLineString: Set<String> = setOf("\"", "\'")
15+
}

core/src/main/kotlin/io/github/cybercodernaj/parkour/exceptions/LexicalException.kt renamed to core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/LexicalException.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package io.github.cybercodernaj.parkour.exceptions
1+
package io.github.cybercodernaj.parkour.lexer
22

33
/**
44
* This is thrown when there is an error when trying to tokenize a string.

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/internal/Lexer.kt

Lines changed: 23 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
package io.github.cybercodernaj.parkour.lexer.internal
22

33
import io.github.cybercodernaj.parkour.datasource.TextSource
4-
import io.github.cybercodernaj.parkour.exceptions.LexicalException
54
import io.github.cybercodernaj.parkour.lexer.LexerBuilder
5+
import io.github.cybercodernaj.parkour.lexer.LexerCommons
6+
import io.github.cybercodernaj.parkour.lexer.LexicalException
67
import io.github.cybercodernaj.parkour.utils.Position
78

89
/**
@@ -12,42 +13,18 @@ import io.github.cybercodernaj.parkour.utils.Position
1213
* @since 0.1.0
1314
*/
1415
class Lexer internal constructor(
15-
private val ignorePattern: Regex = Defaults.ignorePattern,
16-
private val singleLineComments: Regex? = Defaults.singleLineComments,
17-
private val multilineComments: Pair<Regex, Regex>? = Defaults.multilineComments,
18-
private val identifiers: Regex = Defaults.identifiers,
16+
private val ignorePattern: Regex? = Regex("""\s+"""),
17+
private val singleLineComments: Regex? = null,
18+
private val multilineComments: Pair<Regex, Regex>? = null,
19+
private val identifiers: Regex? = Regex("""[a-zA-Z_]\w*"""),
1920
private val hardKeywords: List<String> = emptyList(),
2021
private val operators: List<String> = emptyList(),
2122
private val separators: List<String> = emptyList(),
22-
private val integerLiteral: Regex? = Defaults.integerLiteral,
23-
private val floatingLiteral: Regex? = Defaults.floatingLiteral,
24-
private val singleLineString: Set<String> = Defaults.singleLineString,
25-
private val escapeSequences: List<Pair<Regex, (String) -> Char>> = Defaults.escapeSequences,
23+
private val integerLiteral: Regex? = LexerCommons.integerLiteral,
24+
private val floatingLiteral: Regex? = LexerCommons.floatingLiteral,
25+
private val singleLineString: Set<String> = LexerCommons.singleLineString,
26+
private val escapeSequences: List<Pair<Regex, (String) -> Char>> = listOf(),
2627
) {
27-
/**
28-
* A list of common patterns and lists of items that most programming languages and
29-
* data serialization formats.
30-
*
31-
* @author Nishant Aanjaney Jalan
32-
* @since 0.2.0
33-
*/
34-
object Defaults {
35-
/**
36-
* The lexer will skip over any strings that match this regex.
37-
* This acts like a token separator.
38-
*
39-
* @author Nishant Aanjaney Jalan
40-
* @since 0.2.0
41-
*/
42-
val ignorePattern = Regex("""\s+""")
43-
val singleLineComments: Regex? = null
44-
val multilineComments: Pair<Regex, Regex>? = null
45-
val identifiers = Regex("""[a-zA-Z_]\w*""")
46-
val integerLiteral = Regex("""[-+]?[0-9_]+""")
47-
val floatingLiteral = Regex("""[-+]?[0-9_]*\.[0-9_]+(?:[eE][-+]?[0-9_]+)?""")
48-
val singleLineString: Set<String> = setOf("\"", "\'")
49-
val escapeSequences: List<Pair<Regex, (String) -> Char>> = emptyList() // TODO fill this list
50-
}
5128

5229
private var position = Position(0, 0)
5330

@@ -62,9 +39,6 @@ class Lexer internal constructor(
6239

6340
private var insideMultilineComment = false
6441

65-
private val _separators = separators.sortedByDescending(String::length)
66-
private val _operators = operators.sortedByDescending(String::length)
67-
6842
/**
6943
* Fetches the next [Token] from the source
7044
*
@@ -115,10 +89,11 @@ class Lexer internal constructor(
11589
}
11690
}
11791

118-
(position pointsAt ignorePattern)
119-
?.let { match ->
120-
position = position.copy(col = match.range.last + 1)
121-
}
92+
val ignoreMatch = position pointsAt ignorePattern
93+
if (ignoreMatch != null) {
94+
position = position.copy(col = ignoreMatch.range.last + 1)
95+
continue
96+
}
12297

12398
(position pointsAtSome hardKeywords)
12499
?.let { keyword ->
@@ -133,14 +108,14 @@ class Lexer internal constructor(
133108
position = token.end + 1
134109
}
135110

136-
(position pointsAtSome _operators)
111+
(position pointsAtSome operators)
137112
?.let { keyword ->
138113
val end = position.copy(col = position.col + keyword.length - 1)
139114
tokenStream.addOperator(position, end)
140115
position = end + 1
141116
}
142117

143-
(position pointsAtSome _separators)
118+
(position pointsAtSome separators)
144119
?.let { keyword ->
145120
val end = position.copy(col = position.col + keyword.length - 1)
146121
tokenStream.addSeparator(position, end)
@@ -300,5 +275,11 @@ class Lexer internal constructor(
300275
private infix fun Position.pointsAt(pattern: String): Boolean {
301276
return currentLine.startsWith(pattern, startIndex = this.col)
302277
}
278+
279+
fun resetPosition() {
280+
position = Position(0, 0)
281+
tokenIndex = 0
282+
tokenStream = emptyList()
283+
}
303284
}
304285

core/src/test/kotlin/io/github/cybercodernaj/parkour/lexer/LexerBuilderTest.kt

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,34 @@ import io.github.cybercodernaj.parkour.datasource.StringSource
44
import io.github.cybercodernaj.parkour.lexer.internal.Token
55
import io.github.cybercodernaj.parkour.testutils.assertTokens
66
import io.github.cybercodernaj.parkour.utils.Position
7+
import org.junit.jupiter.api.BeforeEach
78
import org.junit.jupiter.api.Test
89

910
class LexerBuilderTest {
11+
private val myLexer = lexer {
12+
ignorePattern = Regex("[. ]+")
13+
singleLineComments = Regex("//")
14+
multilineComments = Regex("/\\*") to Regex("\\*/")
15+
identifiers = Regex("[a-z][a-zA-Z0-9]+")
16+
17+
hardKeywords("val", "var")
18+
separators("(", ")", "<", ">", ",", ".")
19+
hardKeywords("val")
20+
operators("*", "**", "/", "+", "-", "=", "==")
21+
}
22+
1023
@Test
1124
fun `initialise a default lexer`() {
1225
lexer {}
1326
}
1427

28+
@BeforeEach
29+
fun setupEach() {
30+
myLexer.resetPosition()
31+
}
32+
1533
@Test
1634
fun `sets ignore patterns`() {
17-
val myLexer = lexer {
18-
ignorePattern = Regex("\\.")
19-
}
20-
2135
myLexer.source = StringSource("hi.hello")
2236
assertTokens(
2337
myLexer,
@@ -30,10 +44,6 @@ class LexerBuilderTest {
3044

3145
@Test
3246
fun `sets single line comments`() {
33-
val myLexer = lexer {
34-
singleLineComments = Regex("//")
35-
}
36-
3747
myLexer.source = StringSource("hi // hello\nhru")
3848
assertTokens(
3949
myLexer,
@@ -46,10 +56,6 @@ class LexerBuilderTest {
4656

4757
@Test
4858
fun `sets multiline comments`() {
49-
val myLexer = lexer {
50-
multilineComments = Regex("/\\*") to Regex("\\*/")
51-
}
52-
5359
myLexer.source = StringSource("hi /* hello */\nhru")
5460
assertTokens(
5561
myLexer,
@@ -62,10 +68,6 @@ class LexerBuilderTest {
6268

6369
@Test
6470
fun `sets identifiers rule`() {
65-
val myLexer = lexer {
66-
identifiers = Regex("[a-z][a-zA-Z0-9]+")
67-
}
68-
6971
myLexer.source = StringSource("hi value")
7072
assertTokens(
7173
myLexer,
@@ -78,10 +80,6 @@ class LexerBuilderTest {
7880

7981
@Test
8082
fun `sets hard keywords`() {
81-
val myLexer = lexer {
82-
hardKeywords("val", "var")
83-
}
84-
8583
myLexer.source = StringSource("val c")
8684
assertTokens(
8785
myLexer,
@@ -94,11 +92,6 @@ class LexerBuilderTest {
9492

9593
@Test
9694
fun `set separators`() {
97-
val myLexer = lexer {
98-
separators("(", ")", "<", ">", ",", ".")
99-
}
100-
101-
10295
myLexer.source = StringSource("Array<List<Set<Int>>>")
10396

10497
assertTokens(
@@ -119,11 +112,6 @@ class LexerBuilderTest {
119112

120113
@Test
121114
fun `set operators`() {
122-
val myLexer = lexer {
123-
hardKeywords("val")
124-
operators("*", "**", "/", "//", "+", "-", "=", "==")
125-
}
126-
127115
myLexer.source = StringSource("val diff = new - old")
128116

129117
assertTokens(

core/src/test/kotlin/io/github/cybercodernaj/parkour/lexer/internal/LexerIdentifierTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package io.github.cybercodernaj.parkour.lexer.internal
22

33
import io.github.cybercodernaj.parkour.datasource.StringSource
4-
import io.github.cybercodernaj.parkour.exceptions.LexicalException
4+
import io.github.cybercodernaj.parkour.lexer.LexicalException
55
import io.github.cybercodernaj.parkour.utils.Position
66
import org.junit.jupiter.api.Test
77
import org.junit.jupiter.api.fail

core/src/test/kotlin/io/github/cybercodernaj/parkour/lexer/internal/LexerLiteralTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package io.github.cybercodernaj.parkour.lexer.internal
22

33
import io.github.cybercodernaj.parkour.datasource.StringSource
4-
import io.github.cybercodernaj.parkour.exceptions.LexicalException
4+
import io.github.cybercodernaj.parkour.lexer.LexicalException
55
import io.github.cybercodernaj.parkour.testutils.assertTokens
66
import io.github.cybercodernaj.parkour.utils.Position
77
import org.junit.jupiter.api.Test

0 commit comments

Comments
 (0)