Skip to content

Commit 3999818

Browse files
authored
Correctly handle buffer boundaries while decoding escape sequences from json stream (#1706)
Fixes #1702
1 parent e721ebe commit 3999818

File tree

6 files changed

+77
-41
lines changed

6 files changed

+77
-41
lines changed

formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ internal abstract class AbstractJsonLexer {
140140
open fun ensureHaveChars() {}
141141

142142
// Used as bound check in loops
143-
abstract fun definitelyNotEof(position: Int): Int
143+
abstract fun prefetchOrEof(position: Int): Int
144144

145145
abstract fun tryConsumeComma(): Boolean
146146

@@ -182,7 +182,7 @@ internal abstract class AbstractJsonLexer {
182182
val source = source
183183
var cpos = currentPosition
184184
while (true) {
185-
cpos = definitelyNotEof(cpos)
185+
cpos = prefetchOrEof(cpos)
186186
if (cpos == -1) break // could be inline function but KT-1436
187187
val c = source[cpos++]
188188
if (c == ' ' || c == '\n' || c == '\r' || c == '\t') continue
@@ -223,7 +223,7 @@ internal abstract class AbstractJsonLexer {
223223
val source = source
224224
var cpos = currentPosition
225225
while (true) {
226-
cpos = definitelyNotEof(cpos)
226+
cpos = prefetchOrEof(cpos)
227227
if (cpos == -1) break
228228
val ch = source[cpos]
229229
if (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t') {
@@ -244,7 +244,7 @@ internal abstract class AbstractJsonLexer {
244244
*/
245245
fun tryConsumeNotNull(): Boolean {
246246
var current = skipWhitespaces()
247-
current = definitelyNotEof(current)
247+
current = prefetchOrEof(current)
248248
// Cannot consume null due to EOF, maybe something else
249249
val len = source.length - current
250250
if (len < 4 || current == -1) return true
@@ -264,7 +264,7 @@ internal abstract class AbstractJsonLexer {
264264
var current = currentPosition
265265
// Skip whitespaces
266266
while (true) {
267-
current = definitelyNotEof(current)
267+
current = prefetchOrEof(current)
268268
if (current == -1) break
269269
val c = source[current]
270270
// Faster than char2TokenClass actually
@@ -317,13 +317,15 @@ internal abstract class AbstractJsonLexer {
317317
while (char != STRING) {
318318
if (char == STRING_ESC) {
319319
usedAppend = true
320-
currentPosition = appendEscape(lastPosition, currentPosition)
320+
currentPosition = prefetchOrEof(appendEscape(lastPosition, currentPosition))
321+
if (currentPosition == -1)
322+
fail("EOF", currentPosition)
321323
lastPosition = currentPosition
322324
} else if (++currentPosition >= source.length) {
323325
usedAppend = true
324326
// end of chunk
325327
appendRange(lastPosition, currentPosition)
326-
currentPosition = definitelyNotEof(currentPosition)
328+
currentPosition = prefetchOrEof(currentPosition)
327329
if (currentPosition == -1)
328330
fail("EOF", currentPosition)
329331
lastPosition = currentPosition
@@ -395,7 +397,7 @@ internal abstract class AbstractJsonLexer {
395397
if (current >= source.length) {
396398
usedAppend = true
397399
appendRange(currentPosition, current)
398-
val eof = definitelyNotEof(current)
400+
val eof = prefetchOrEof(current)
399401
if (eof == -1) {
400402
// to handle plain lenient strings, such as top-level
401403
currentPosition = current
@@ -421,7 +423,7 @@ internal abstract class AbstractJsonLexer {
421423

422424
private fun appendEsc(startPosition: Int): Int {
423425
var currentPosition = startPosition
424-
currentPosition = definitelyNotEof(currentPosition)
426+
currentPosition = prefetchOrEof(currentPosition)
425427
if (currentPosition == -1) fail("Expected escape sequence to continue, got EOF")
426428
val currentChar = source[currentPosition++]
427429
if (currentChar == UNICODE_ESC) {
@@ -435,7 +437,13 @@ internal abstract class AbstractJsonLexer {
435437
}
436438

437439
private fun appendHex(source: CharSequence, startPos: Int): Int {
438-
if (startPos + 4 >= source.length) fail("Unexpected EOF during unicode escape")
440+
if (startPos + 4 >= source.length) {
441+
currentPosition = startPos
442+
ensureHaveChars()
443+
if (currentPosition + 4 >= source.length)
444+
fail("Unexpected EOF during unicode escape")
445+
return appendHex(source, currentPosition)
446+
}
439447
escapedString.append(
440448
((fromHexChar(source, startPos) shl 12) +
441449
(fromHexChar(source, startPos + 1) shl 8) +
@@ -520,7 +528,7 @@ internal abstract class AbstractJsonLexer {
520528
* that doesn't allocate and also doesn't support any radix but 10
521529
*/
522530
var current = skipWhitespaces()
523-
current = definitelyNotEof(current)
531+
current = prefetchOrEof(current)
524532
if (current >= source.length || current == -1) fail("EOF")
525533
val hasQuotation = if (source[current] == STRING) {
526534
// Check it again
@@ -598,7 +606,7 @@ internal abstract class AbstractJsonLexer {
598606
* in 6-th bit and we leverage this fact, our implementation consumes boolean literals
599607
* in a case-insensitive manner.
600608
*/
601-
var current = definitelyNotEof(start)
609+
var current = prefetchOrEof(start)
602610
if (current >= source.length || current == -1) fail("EOF")
603611
return when (source[current++].code or asciiCaseMask) {
604612
't'.code -> {

formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
/*
2+
* Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
3+
*/
4+
15
package kotlinx.serialization.json.internal
26

37
internal class StringJsonLexer(override val source: String) : AbstractJsonLexer() {
48

5-
override fun definitelyNotEof(position: Int): Int = if (position < source.length) position else -1
9+
override fun prefetchOrEof(position: Int): Int = if (position < source.length) position else -1
610

711
override fun consumeNextToken(): Byte {
812
val source = source

formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
/*
2+
* Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
3+
*/
4+
15
package kotlinx.serialization.json
26

37
import kotlinx.serialization.*
48
import kotlinx.serialization.builtins.*
5-
import kotlinx.serialization.json.internal.*
69
import kotlinx.serialization.test.*
710
import kotlin.random.*
811
import kotlin.test.*
@@ -59,7 +62,7 @@ class JsonUnicodeTest : JsonTestBase() {
5962
@Test
6063
fun testRandomEscapeSequences() = noJs { // Too slow on JS
6164
repeat(10_000) {
62-
val s = generateRandomString()
65+
val s = generateRandomUnicodeString(Random.nextInt(1, 2047))
6366
try {
6467
assertSerializedAndRestored(s, String.serializer())
6568
} catch (e: Throwable) {
@@ -68,21 +71,4 @@ class JsonUnicodeTest : JsonTestBase() {
6871
}
6972
}
7073
}
71-
72-
private fun generateRandomString(): String {
73-
val size = Random.nextInt(1, 2047)
74-
return buildString(size) {
75-
repeat(size) {
76-
val pickEscape = Random.nextBoolean()
77-
if (pickEscape) {
78-
// Definitely escape symbol
79-
// null can be appended as well, completely okay
80-
append(ESCAPE_STRINGS.random())
81-
} else {
82-
// Any symbol, including escaping one
83-
append(Char(Random.nextInt(Char.MIN_VALUE.code..Char.MAX_VALUE.code)))
84-
}
85-
}
86-
}
87-
}
8874
}

formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
/*
2-
* Copyright 2017-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
2+
* Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
33
*/
44
package kotlinx.serialization.test
55

66
import kotlinx.serialization.*
77
import kotlinx.serialization.descriptors.*
8+
import kotlinx.serialization.json.internal.ESCAPE_STRINGS
9+
import kotlin.random.Random
10+
import kotlin.random.nextInt
811
import kotlin.test.*
912

1013
fun SerialDescriptor.assertDescriptorEqualsTo(other: SerialDescriptor) {
@@ -40,3 +43,18 @@ inline fun assertFailsWithMissingField(block: () -> Unit) {
4043
val e = assertFailsWith<SerializationException>(block = block)
4144
assertTrue(e.message?.contains("but it was missing") ?: false)
4245
}
46+
47+
fun generateRandomUnicodeString(size: Int): String {
48+
return buildString(size) {
49+
repeat(size) {
50+
val pickEscape = Random.nextBoolean()
51+
if (pickEscape) {
52+
// Definitely an escape symbol
53+
append(ESCAPE_STRINGS.random().takeIf { it != null } ?: 'N')
54+
} else {
55+
// Any symbol, including escaping one
56+
append(Char(Random.nextInt(Char.MIN_VALUE.code..Char.MAX_VALUE.code)).takeIf { it.isDefined() && !it.isSurrogate()} ?: 'U')
57+
}
58+
}
59+
}
60+
}

formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ internal class ReaderJsonLexer(
5858
ensureHaveChars()
5959
var current = currentPosition
6060
while (true) {
61-
current = definitelyNotEof(current)
61+
current = prefetchOrEof(current)
6262
if (current == -1) break // could be inline function but KT-1436
6363
val c = source[current]
6464
// Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
@@ -93,7 +93,7 @@ internal class ReaderJsonLexer(
9393
currentPosition = 0
9494
}
9595

96-
override fun definitelyNotEof(position: Int): Int {
96+
override fun prefetchOrEof(position: Int): Int {
9797
if (position < source.length) return position
9898
currentPosition = position
9999
ensureHaveChars()
@@ -106,7 +106,7 @@ internal class ReaderJsonLexer(
106106
val source = source
107107
var cpos = currentPosition
108108
while (true) {
109-
cpos = definitelyNotEof(cpos)
109+
cpos = prefetchOrEof(cpos)
110110
if (cpos == -1) break
111111
val ch = source[cpos++]
112112
return when (val tc = charToTokenClass(ch)) {
@@ -141,7 +141,7 @@ internal class ReaderJsonLexer(
141141
var current = currentPosition
142142
val closingQuote = indexOf('"', current)
143143
if (closingQuote == -1) {
144-
current = definitelyNotEof(current)
144+
current = prefetchOrEof(current)
145145
if (current == -1) fail(TC_STRING)
146146
// it's also possible just to resize buffer,
147147
// instead of falling back to slow path,

formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44

55
package kotlinx.serialization.features
66

7-
import kotlinx.serialization.*
7+
import kotlinx.serialization.SerializationException
8+
import kotlinx.serialization.StringData
89
import kotlinx.serialization.builtins.serializer
9-
import kotlinx.serialization.json.Json
10+
import kotlinx.serialization.json.*
1011
import kotlinx.serialization.json.internal.BATCH_SIZE
11-
import kotlinx.serialization.test.decodeViaStream
12-
import kotlinx.serialization.test.encodeViaStream
12+
import kotlinx.serialization.test.*
1313
import org.junit.Test
14+
import java.io.ByteArrayInputStream
15+
import java.io.ByteArrayOutputStream
1416
import kotlin.test.assertEquals
1517
import kotlin.test.assertFailsWith
1618

@@ -65,4 +67,22 @@ class JsonJvmStreamsTest {
6567
Json.decodeViaStream(String.serializer(), "\"")
6668
}
6769
}
70+
71+
@Test
72+
fun testRandomEscapeSequences() {
73+
repeat(1000) {
74+
val s = generateRandomUnicodeString(strLen)
75+
try {
76+
val serializer = String.serializer()
77+
val b = ByteArrayOutputStream()
78+
Json.encodeToStream(serializer, s, b)
79+
val restored = Json.decodeFromStream(serializer, ByteArrayInputStream(b.toByteArray()))
80+
assertEquals(s, restored)
81+
} catch (e: Throwable) {
82+
// Not assertion error to preserve cause
83+
throw IllegalStateException("Unexpectedly failed test, cause string: $s", e)
84+
}
85+
}
86+
}
87+
6888
}

0 commit comments

Comments
 (0)