Skip to content

Commit 28cc428

Browse files
feat: flattened literals into lexer and improved docs
1 parent 8c8d61d commit 28cc428

File tree

20 files changed

+726
-86
lines changed

20 files changed

+726
-86
lines changed

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/LexerBuilder.kt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,24 @@ import io.github.cybercodernaj.parkour.lexer.internal.Lexer
44

55
/**
66
* A helper class to create the [Lexer].
7-
* Contains functions to be used as part of the [lexer] dsl.
7+
* Contains functions to be used as part of the [lexer] DSL.
8+
* Each property's default value is detailed in [Lexer.Defaults].
89
*
910
* @author Nishant Aanjaney Jalan
1011
* @since 0.2.0
1112
*/
1213
class LexerBuilder internal constructor() {
13-
internal var ignorePattern: Regex = Regex("""\s+""")
14+
internal var ignorePattern: Regex = Lexer.Defaults.ignorePattern
1415
private set
1516

1617
/**
1718
* ignorePattern is what the lexer will use to skip over.
1819
* The part of the string that matches this regex will be ignored.
19-
* This acts like a token separator, defaulted to "\s+" regex.
20+
* This acts like a token separator.
2021
*
2122
* @param regex regex of the pattern the lexer will not tokenize.
2223
*
24+
* @see Lexer.Defaults.ignorePattern
2325
* @author Nishant Aanjaney Jalan
2426
* @since 0.2.0
2527
*/
@@ -29,7 +31,7 @@ class LexerBuilder internal constructor() {
2931
}
3032

3133
/**
32-
* Build a lexer in a DSL type language.
34+
* Build a lexer in a DSL.
3335
* Accepts a function with [LexerBuilder] as a receiver and returns a lexer of that configuration.
3436
*
3537
* @param init the execution block in context of [LexerBuilder]

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/internal/Lexer.kt

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,45 @@ package io.github.cybercodernaj.parkour.lexer.internal
22

33
import io.github.cybercodernaj.parkour.datasource.TextSource
44
import io.github.cybercodernaj.parkour.exceptions.LexicalException
5+
import io.github.cybercodernaj.parkour.lexer.LexerBuilder
56
import io.github.cybercodernaj.parkour.utils.Position
67

78
/**
9+
* # Lexer
10+
*
811
* The lexer is responsible to convert the given string into a stream of [Token]s.
9-
* The lexer take in multiple settings that configure how it behaves.
12+
* The lexer take in multiple settings via the [LexerBuilder] that configures how it behaves.
1013
* It will perform lexical analysis on a line-by-line basis and return the next unconsumed token.
11-
* A newline character is **always** separates a token.
14+
* A newline character **always** separates a token unless it is a multiline comment.
15+
*
16+
* ## Literals
17+
*
18+
* There are only three types of literals the lexer manages.
19+
* 1. Integer literals are normally lexed with a pure stream of numbers with underscores.
20+
* 2. Floating literals are normally lexed with a forced decimal point with optional exponentiation.
21+
* 3. String literals are normally lexed exact strings till it finds the original match.
22+
*
23+
* Additionally, escape sequences are required to input special characters inside string literals.
1224
*
1325
* @constructor Creates a lexer with the provided properties.
14-
* @param ignorePattern characters that satisfy this regex would be skipped. (Default: "\s+")
26+
* @param ignorePattern characters that satisfy this regex would be skipped.
1527
* @param singleLineComments The regex that defines how a single-line comment starts.
1628
* Once identified, the lexer will skip the remaining line. (Default: null)
1729
* @param multilineComments A pair of regexes, the starting pattern and the ending pattern for a
1830
* multiline comment block. (Default: null)
19-
* @param identifiers A regex string that defines the rules for defining a name. (Default: "[a-zA-Z_]\w*")
31+
* @param identifiers A regex string that defines the rules for defining a name.
2032
* @param hardKeywords A set of strings that are considered hard keywords.
2133
* Hard keywords are a characters and symbols that give a particular meaning to a program.
2234
* They may not be used as identifiers. (Default: [])
2335
* @param operators A set of strings that are considered as operators.
2436
* Operators are characters and symbols that may perform arithmetic or logical operations. (Default: [])
2537
* @param separators A set of strings that are considered as separators.
2638
* Separators are characters and symbols that act like delimiters to separate other meaningful elements. (Default: [])
27-
* @param literals The configuration of literals. Literals denote constant values
28-
* such as numbers, strings, and characters. (Default: see [Literals])
39+
* @param integerLiteral a regex that detects an integer literal.
40+
* @param floatingLiteral a regex that detects a floating point number literal.
41+
* @param singleLineString a set of strings that denote the start and end enclosing strings.
42+
* The lexer will throw a [LexicalException] when a string literal is not terminated in the same line.
43+
* @param escapeSequences a list of regex that matches an escape sequence. On match, it will return a Char based on the string matched.
2944
*
3045
* @author Nishant Aanjaney Jalan
3146
* @since 0.1.0
@@ -38,9 +53,27 @@ class Lexer internal constructor(
3853
private val hardKeywords: Set<String> = emptySet(),
3954
private val operators: Set<String> = emptySet(),
4055
private val separators: Set<String> = emptySet(),
41-
private val literals: Literals = Literals()
56+
private val integerLiteral: Regex? = Defaults.integerLiterals,
57+
private val floatingLiteral: Regex? = Defaults.floatingLiterals,
58+
private val singleLineString: Set<String> = Defaults.singleLineString,
59+
private val escapeSequences: List<Pair<Regex, (String) -> Char>> = Defaults.escapeSequences,
4260
) {
61+
/**
62+
* A list of common patterns and lists of items that most programming languages and
63+
* data serialization formats.
64+
*
65+
* @author Nishant Aanjaney Jalan
66+
* @since 0.2.0
67+
*/
4368
object Defaults {
69+
/**
70+
* ignorePattern is what the lexer will use to skip over.
71+
* The part of the string that matches this regex will be ignored.
72+
* This acts like a token separator.
73+
*
74+
* @author Nishant Aanjaney Jalan
75+
* @since 0.2.0
76+
*/
4477
val ignorePattern = Regex("""\s+""")
4578
val identifiers = Regex("""[a-zA-Z_]\w*""")
4679
val integerLiterals = Regex("""[-+]?[0-9_]+""")
@@ -164,7 +197,7 @@ class Lexer internal constructor(
164197
}
165198

166199
private fun tryLiterals(): Token.Literal? {
167-
(position pointsAt literals.floatingLiteral)
200+
(position pointsAt floatingLiteral)
168201
?.let { match ->
169202
if (match.value.isBlank())
170203
return null
@@ -177,7 +210,7 @@ class Lexer internal constructor(
177210
} ?: throw LexicalException("Double regex is badly formed.")
178211
}
179212

180-
(position pointsAt literals.integerLiteral)
213+
(position pointsAt integerLiteral)
181214
?.let { match ->
182215
if (match.value.isBlank())
183216
return null
@@ -190,14 +223,14 @@ class Lexer internal constructor(
190223
} ?: throw LexicalException("Int regex is badly formed. Tried parsing ${match.value} to an integer")
191224
}
192225

193-
val stringStart = position pointsAtSome literals.singleLineString
226+
val stringStart = position pointsAtSome singleLineString
194227
if (stringStart != null) {
195228
val stringLit = StringBuilder().append(currentLine[position.col])
196229
val start = position++
197230
if (position.col >= currentLine.length)
198231
throw LexicalException("String not closed in the given line")
199232
while (currentLine[position.col].toString() != stringStart) {
200-
val matches = literals.escapeSequences.mapNotNull { (regex, getEscapeChar) ->
233+
val matches = escapeSequences.mapNotNull { (regex, getEscapeChar) ->
201234
val result = (position pointsAt regex) ?: return@mapNotNull null
202235
result.value to getEscapeChar(result.value)
203236
}

core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/internal/Literals.kt

Lines changed: 0 additions & 29 deletions
This file was deleted.

core/src/test/kotlin/io/github/cybercodernaj/parkour/lexer/internal/LexerLiteralTest.kt

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,10 @@ class LexerLiteralTest {
1212
private val lexer = Lexer(
1313
hardKeywords = setOf("char"),
1414
operators = setOf("="),
15-
literals = Literals(
16-
escapeSequences = listOf(
17-
Regex("""\\f""") to { _ -> 'f' },
18-
Regex("""\\n""") to { _ -> '\n' },
19-
Regex("""\\u\d{4}""") to { unicode -> unicode.substring(2).toInt(16).toChar() },
20-
)
15+
escapeSequences = listOf(
16+
Regex("""\\f""") to { _ -> 'f' },
17+
Regex("""\\n""") to { _ -> '\n' },
18+
Regex("""\\u\d{4}""") to { unicode -> unicode.substring(2).toInt(16).toChar() },
2119
)
2220
)
2321

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<!doctype html>
2+
<html class="no-js">
3+
<head>
4+
<meta name="viewport" content="width=device-width, initial-scale=1" charset="UTF-8">
5+
<title>escapeSequences</title>
6+
<link href="../../../../images/logo-icon.svg" rel="icon" type="image/svg"> <script>var pathToRoot = "../../../../";</script>
7+
<script>document.documentElement.classList.replace("no-js","js");</script>
8+
<script>const storage = localStorage.getItem("dokka-dark-mode")
9+
if (storage == null) {
10+
const osDarkSchemePreferred = window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches
11+
if (osDarkSchemePreferred === true) {
12+
document.getElementsByTagName("html")[0].classList.add("theme-dark")
13+
}
14+
} else {
15+
const savedDarkMode = JSON.parse(storage)
16+
if(savedDarkMode === true) {
17+
document.getElementsByTagName("html")[0].classList.add("theme-dark")
18+
}
19+
}
20+
</script>
21+
<script type="text/javascript" src="https://unpkg.com/kotlin-playground@1/dist/playground.min.js" async></script>
22+
<script type="text/javascript" src="../../../../scripts/sourceset_dependencies.js" async></script>
23+
<link href="../../../../styles/style.css" rel="Stylesheet">
24+
<link href="../../../../styles/main.css" rel="Stylesheet">
25+
<link href="../../../../styles/prism.css" rel="Stylesheet">
26+
<link href="../../../../styles/logo-styles.css" rel="Stylesheet">
27+
<link href="../../../../styles/font-jb-sans-auto.css" rel="Stylesheet">
28+
<script type="text/javascript" src="../../../../scripts/clipboard.js" async></script>
29+
<script type="text/javascript" src="../../../../scripts/navigation-loader.js" async></script>
30+
<script type="text/javascript" src="../../../../scripts/platform-content-handler.js" async></script>
31+
<script type="text/javascript" src="../../../../scripts/main.js" defer></script>
32+
<script type="text/javascript" src="../../../../scripts/prism.js" async></script>
33+
<script type="text/javascript" src="../../../../scripts/symbol-parameters-wrapper_deferred.js" defer></script></head>
34+
<body>
35+
<div class="root">
36+
<nav class="navigation" id="navigation-wrapper">
37+
<div class="navigation--inner">
38+
<div class="navigation-title">
39+
<button class="menu-toggle" id="menu-toggle" type="button">toggle menu</button>
40+
<div class="library-name">
41+
<a class="library-name--link" href="../../../../index.html">
42+
Parkour
43+
</a> </div>
44+
<div class="library-version">0.1.0
45+
</div>
46+
</div>
47+
<div class="filter-section" id="filter-section">
48+
<button class="platform-tag platform-selector jvm-like" data-active="" data-filter=":core:dokkaHtmlPartial/main">jvm</button>
49+
</div>
50+
</div>
51+
<div class="navigation-controls">
52+
<button class="navigation-controls--btn navigation-controls--theme" id="theme-toggle-button" type="button">switch theme</button>
53+
<div class="navigation-controls--btn navigation-controls--search" id="searchBar" role="button">search in API</div>
54+
</div>
55+
</nav>
56+
<div id="container">
57+
<div class="sidebar" id="leftColumn">
58+
<div class="sidebar--inner" id="sideMenu"></div>
59+
</div>
60+
<div id="main">
61+
<div class="main-content" data-page-type="member" id="content" pageids="parkour::io.github.cybercodernaj.parkour.lexer.internal/Lexer.Defaults/escapeSequences/#/PointingToDeclaration//537371977">
62+
<div class="breadcrumbs"><a href="../../../index.html">parkour</a><span class="delimiter">/</span><a href="../../index.html">io.github.cybercodernaj.parkour.lexer.internal</a><span class="delimiter">/</span><a href="../index.html">Lexer</a><span class="delimiter">/</span><a href="index.html">Defaults</a><span class="delimiter">/</span><span class="current">escapeSequences</span></div>
63+
<div class="cover ">
64+
<h1 class="cover"><span>escape</span><wbr><span><span>Sequences</span></span></h1>
65+
</div>
66+
<div class="platform-hinted " data-platform-hinted="data-platform-hinted"><div class="content sourceset-dependent-content" data-active="" data-togglable=":core:dokkaHtmlPartial/main"><div class="symbol monospace"><span class="token keyword">val </span><a href="escape-sequences.html">escapeSequences</a><span class="token operator">: </span><a href="https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-list/index.html">List</a><span class="token operator">&lt;</span><a href="https://kotlinlang.org/api/latest/jvm/stdlib/kotlin/-pair/index.html">Pair</a><span class="token operator">&lt;</span><a href="https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.text/-regex/index.html">Regex</a><span class="token punctuation">, </span><span class="token punctuation">(</span><a href="https://kotlinlang.org/api/latest/jvm/stdlib/kotlin/-string/index.html">String</a><span class="token punctuation">)</span><span class="token operator"> -&gt; </span><a href="https://kotlinlang.org/api/latest/jvm/stdlib/kotlin/-char/index.html">Char</a><span class="token operator">&gt;</span><span class="token operator">&gt;</span><span class="clearfix"><span class="floating-right">(<a href="https://github.com/cybercoder-naj/parkour/tree/main/core/src/main/kotlin/io/github/cybercodernaj/parkour/lexer/internal/Lexer.kt#L75">source</a>)</span></span></div></div></div>
67+
</div>
68+
<div class="footer">
69+
<span class="go-to-top-icon"><a href="#content" id="go-to-top-link"></a></span><span>© 2024 Copyright</span><span class="pull-right"><span>Generated by </span><a href="https://github.com/Kotlin/dokka"><span>dokka</span><span class="padded-icon"></span></a></span>
70+
</div>
71+
</div>
72+
</div>
73+
</div>
74+
</body>
75+
</html>
76+

0 commit comments

Comments
 (0)