Skip to content

Commit dd79854

Browse files
committed
fixes #192 - utf-16 handling for stream-based constructor
1 parent d76cf1c commit dd79854

File tree

3 files changed

+71
-18
lines changed

3 files changed

+71
-18
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ if(RAPIDCSV_BUILD_TESTS)
184184
add_unit_test(test098)
185185
add_unit_test(test099)
186186
add_unit_test(test100)
187+
if(HAS_CODECVT)
188+
add_unit_test(test101)
189+
endif()
187190

188191
# perf tests
189192
add_perf_test(ptest001)

src/rapidcsv.h

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* rapidcsv.h
33
*
44
* URL: https://github.com/d99kris/rapidcsv
5-
* Version: 8.87
5+
* Version: 8.88
66
*
77
* Copyright (C) 2017-2025 Kristofer Berggren
88
* All rights reserved.
@@ -1532,24 +1532,28 @@ namespace rapidcsv
15321532
mIsUtf16 = true;
15331533
mIsLE = (bom2b == bomU16le);
15341534

1535-
std::wifstream wstream;
1536-
wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit);
1537-
wstream.open(mPath, std::ios::binary);
1538-
if (mIsLE)
1539-
{
1540-
wstream.imbue(std::locale(wstream.getloc(),
1541-
new std::codecvt_utf16<wchar_t, 0x10ffff,
1542-
static_cast<std::codecvt_mode>(std::consume_header |
1543-
std::little_endian)>));
1544-
}
1545-
else
1535+
std::vector<char> buffer(static_cast<size_t>(length));
1536+
pStream.read(buffer.data(), length);
1537+
1538+
const std::wstring& utf16 = [&]()
15461539
{
1547-
wstream.imbue(std::locale(wstream.getloc(),
1548-
new std::codecvt_utf16<wchar_t, 0x10ffff,
1549-
std::consume_header>));
1550-
}
1551-
std::wstringstream wss;
1552-
wss << wstream.rdbuf();
1540+
if (mIsLE)
1541+
{
1542+
const std::codecvt_mode mode =
1543+
static_cast<std::codecvt_mode>(std::consume_header | std::little_endian);
1544+
std::wstring_convert<std::codecvt_utf16<wchar_t, 0x10ffff, mode>> utf16conv;
1545+
return utf16conv.from_bytes(buffer.data(), buffer.data() + length);
1546+
}
1547+
else
1548+
{
1549+
const std::codecvt_mode mode =
1550+
static_cast<std::codecvt_mode>(std::consume_header);
1551+
std::wstring_convert<std::codecvt_utf16<wchar_t, 0x10ffff, mode>> utf16conv;
1552+
return utf16conv.from_bytes(buffer.data(), buffer.data() + length);
1553+
}
1554+
}();
1555+
1556+
std::wstringstream wss(utf16);
15531557
std::string utf8 = ToString(wss.str());
15541558
std::stringstream ss(utf8);
15551559
ParseCsv(ss, static_cast<std::streamsize>(utf8.size()));

tests/test101.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// test101.cpp - read UTF-16 LE stream
2+
3+
#include <rapidcsv.h>
4+
#include "unittest.h"
5+
6+
int main()
7+
{
8+
int rv = 0;
9+
10+
const unsigned char u16le[] =
11+
{
12+
0xff, 0xfe,
13+
0x2d, 0x00, 0x2c, 0x00, 0x41, 0x00, 0x2c, 0x00, 0x42, 0x00, 0x2c, 0x00,
14+
0x43, 0x00, 0x0a, 0x00, 0x31, 0x00, 0x2c, 0x00, 0x33, 0x00, 0x2c, 0x00,
15+
0x39, 0x00, 0x2c, 0x00, 0x38, 0x00, 0x31, 0x00, 0x0a, 0x00, 0x32, 0x00,
16+
0x2c, 0x00, 0x34, 0x00, 0x2c, 0x00, 0x31, 0x00, 0x36, 0x00, 0x2c, 0x00,
17+
0x32, 0x00, 0x35, 0x00, 0x36, 0x00, 0x0a, 0x00
18+
};
19+
const unsigned int u16le_len = 58;
20+
21+
std::string csv(reinterpret_cast<const char*>(u16le), u16le_len);
22+
// "-,A,B,C\n"
23+
// "1,3,9,81\n"
24+
// "2,4,16,256\n"
25+
26+
try
27+
{
28+
// stream from string
29+
std::istringstream sstream(csv);
30+
rapidcsv::Document doc(sstream, rapidcsv::LabelParams(0, 0));
31+
unittest::ExpectEqual(int, doc.GetCell<int>(0, 0), 3);
32+
unittest::ExpectEqual(int, doc.GetCell<int>(1, 0), 9);
33+
unittest::ExpectEqual(int, doc.GetCell<int>(2, 0), 81);
34+
35+
unittest::ExpectEqual(std::string, doc.GetCell<std::string>("A", "2"), "4");
36+
unittest::ExpectEqual(std::string, doc.GetCell<std::string>("B", "2"), "16");
37+
unittest::ExpectEqual(std::string, doc.GetCell<std::string>("C", "2"), "256");
38+
}
39+
catch (const std::exception& ex)
40+
{
41+
std::cout << ex.what() << std::endl;
42+
rv = 1;
43+
}
44+
45+
return rv;
46+
}

0 commit comments

Comments
 (0)