Skip to content

Commit ac956fe

Browse files
committed
minor change to text value secondary parsing, hoping to speed it up a bit
1 parent 32e4e91 commit ac956fe

File tree

3 files changed

+204
-11
lines changed

3 files changed

+204
-11
lines changed

src/main/java/com/fasterxml/jackson/core/io/CharTypes.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public final class CharTypes
2121
*/
2222
final static int[] sInputCodes;
2323
static {
24-
/* 96 would do for most cases (backslash is ascii 94)
24+
/* 96 would do for most cases (backslash is ASCII 94)
2525
* but if we want to do lookups by raw bytes it's better
2626
* to have full table
2727
*/

src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,8 @@ protected void _finishString2() throws IOException
15041504
{
15051505
char[] outBuf = _textBuffer.getCurrentSegment();
15061506
int outPtr = _textBuffer.getCurrentSegmentSize();
1507+
final int[] codes = _icLatin1;
1508+
final int maxCode = codes.length;
15071509

15081510
while (true) {
15091511
if (_inputPtr >= _inputEnd) {
@@ -1513,21 +1515,18 @@ protected void _finishString2() throws IOException
15131515
}
15141516
char c = _inputBuffer[_inputPtr++];
15151517
int i = (int) c;
1516-
if (i <= INT_BACKSLASH) {
1517-
if (i == INT_BACKSLASH) {
1518+
if (i < maxCode && codes[i] != 0) {
1519+
if (i == INT_QUOTE) {
1520+
break;
1521+
} else if (i == INT_BACKSLASH) {
15181522
/* Although chars outside of BMP are to be escaped as
15191523
* an UTF-16 surrogate pair, does that affect decoding?
15201524
* For now let's assume it does not.
15211525
*/
15221526
c = _decodeEscaped();
1523-
} else if (i <= INT_QUOTE) {
1524-
if (i == INT_QUOTE) {
1525-
break;
1526-
}
1527-
if (i < INT_SPACE) {
1528-
_throwUnquotedSpace(i, "string value");
1529-
}
1530-
}
1527+
} else if (i < INT_SPACE) {
1528+
_throwUnquotedSpace(i, "string value");
1529+
} // anything else?
15311530
}
15321531
// Need more room?
15331532
if (outPtr >= outBuf.length) {
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
package perf;
2+
3+
public class ManualCharAccessTest
4+
{
5+
protected int hash;
6+
7+
protected final static byte[] SMALL_BYTE_CODES = new byte[256];
8+
9+
protected final static int[] SMALL_INT_CODES = new int[256];
10+
11+
protected final static int[] INT_CODES = new int[0x10000];
12+
protected final static byte[] BYTE_CODES = new byte[0x10000];
13+
14+
static {
15+
for (int i = 0; i < 32; ++i) {
16+
if (!(i == '\r' || i == '\n' || i == '\t')) {
17+
INT_CODES[i] = 1;
18+
BYTE_CODES[i] = 1;
19+
SMALL_BYTE_CODES[i] = 1;
20+
SMALL_INT_CODES[i] = 1;
21+
}
22+
}
23+
INT_CODES['\\'] = 2;
24+
BYTE_CODES['\\'] = 2;
25+
SMALL_BYTE_CODES['\\'] = 2;
26+
SMALL_INT_CODES['\\'] = 2;
27+
}
28+
29+
protected String generateString(int len)
30+
{
31+
int counter = 0;
32+
StringBuilder sb = new StringBuilder(len + 20);
33+
do {
34+
sb.append("Some stuff: ").append(len).append("\n");
35+
if ((++counter % 31) == 0) {
36+
sb.append("\\");
37+
}
38+
} while (sb.length() < len);
39+
return sb.toString();
40+
}
41+
42+
private void test() throws Exception
43+
{
44+
final String INPUT_STR = generateString(23000);
45+
final char[] INPUT_CHARS = INPUT_STR.toCharArray();
46+
final char[] OUTPUT = new char[INPUT_CHARS.length];
47+
48+
// Let's try to guestimate suitable size, N megs of output
49+
final int REPS = (int) ((double) (80 * 1000 * 1000) / (double) INPUT_CHARS.length);
50+
System.out.printf("%d bytes to scan, will do %d repetitions\n",
51+
INPUT_CHARS.length, REPS);
52+
53+
int i = 0;
54+
int roundsDone = 0;
55+
final int TYPES = 3;
56+
final int WARMUP_ROUNDS = 5;
57+
58+
final long[] times = new long[TYPES];
59+
60+
while (true) {
61+
int round = (i++ % TYPES);
62+
63+
String msg;
64+
boolean lf = (round == 0);
65+
66+
long msecs;
67+
68+
switch (round) {
69+
case 0:
70+
msg = "Read classic";
71+
msecs = readClassic(REPS, INPUT_CHARS, OUTPUT);
72+
break;
73+
case 1:
74+
msg = "Read, byte[]";
75+
msecs = readWithByte(REPS, INPUT_CHARS, OUTPUT);
76+
break;
77+
case 2:
78+
msg = "Read, int[]";
79+
msecs = readWithInt(REPS, INPUT_CHARS, OUTPUT);
80+
break;
81+
default:
82+
throw new Error();
83+
}
84+
// skip first 5 rounds to let results stabilize
85+
if (roundsDone >= WARMUP_ROUNDS) {
86+
times[round] += msecs;
87+
}
88+
89+
System.out.printf("Test '%s' [hash: 0x%s] -> %d msecs\n", msg, this.hash, msecs);
90+
if (lf) {
91+
++roundsDone;
92+
if ((roundsDone % 7) == 0 && roundsDone > WARMUP_ROUNDS) {
93+
double den = (double) (roundsDone - WARMUP_ROUNDS);
94+
System.out.printf("Averages after %d rounds (classic, byte[], int[]): "
95+
+"%.1f / %.1f / %.1f msecs\n",
96+
(int) den
97+
,times[0] / den, times[1] / den, times[2] / den
98+
);
99+
100+
}
101+
System.out.println();
102+
}
103+
if ((i % 17) == 0) {
104+
System.out.println("[GC]");
105+
Thread.sleep(100L);
106+
System.gc();
107+
Thread.sleep(100L);
108+
}
109+
}
110+
}
111+
112+
private final long readClassic(int REPS, char[] input, char[] output) throws Exception
113+
{
114+
long start = System.currentTimeMillis();
115+
final byte[] codes = BYTE_CODES;
116+
final int MAX = 256;
117+
118+
while (--REPS >= 0) {
119+
int outPtr = 0;
120+
for (int i = 0, end = input.length; i < end; ++i) {
121+
int ch = input[i];
122+
if (ch < MAX && codes[ch] == NULL_BYTE) {
123+
output[outPtr++] = (char) ch;
124+
continue;
125+
}
126+
if (ch == '\\') {
127+
output[outPtr++] = '_';
128+
} else if (ch == '\n') {
129+
output[outPtr++] = '_';
130+
}
131+
}
132+
}
133+
long time = System.currentTimeMillis() - start;
134+
return time;
135+
}
136+
137+
private final long readWithByte(int REPS, char[] input, char[] output) throws Exception
138+
{
139+
long start = System.currentTimeMillis();
140+
final byte[] codes = BYTE_CODES;
141+
while (--REPS >= 0) {
142+
int outPtr = 0;
143+
for (int i = 0, end = input.length; i < end; ++i) {
144+
char ch = input[i];
145+
if (codes[ch] == NULL_BYTE) {
146+
output[outPtr++] = ch;
147+
continue;
148+
}
149+
if (ch == '\\') {
150+
output[outPtr++] = '_';
151+
} else if (ch == '\n') {
152+
output[outPtr++] = '_';
153+
}
154+
}
155+
}
156+
long time = System.currentTimeMillis() - start;
157+
return time;
158+
}
159+
160+
final static byte NULL_BYTE = (byte) 0;
161+
162+
private final long readWithInt(int REPS, char[] input, char[] output) throws Exception
163+
{
164+
long start = System.currentTimeMillis();
165+
final int[] codes = INT_CODES;
166+
while (--REPS >= 0) {
167+
int outPtr = 0;
168+
169+
for (int i = 0, end = input.length; i < end; ++i) {
170+
char ch = input[i];
171+
if (codes[ch] != 0) {
172+
output[outPtr++] = ch;
173+
continue;
174+
}
175+
if (ch == '\\') {
176+
output[outPtr++] = '_';
177+
} else if (ch == '\n') {
178+
output[outPtr++] = '_';
179+
}
180+
}
181+
}
182+
long time = System.currentTimeMillis() - start;
183+
return time;
184+
}
185+
186+
public static void main(String[] args) throws Exception
187+
{
188+
if (args.length != 0) {
189+
System.err.println("Usage: java ...");
190+
System.exit(1);
191+
}
192+
new ManualCharAccessTest().test();
193+
}
194+
}

0 commit comments

Comments
 (0)