Skip to content

Commit 964360c

Browse files
committed
jdbc duckdb tests
1 parent eebc009 commit 964360c

File tree

3 files changed

+249
-2
lines changed

3 files changed

+249
-2
lines changed

dataframe-jdbc/build.gradle.kts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ dependencies {
2626
testImplementation(libs.junit)
2727
testImplementation(libs.sl4jsimple)
2828
testImplementation(libs.jts)
29+
30+
testImplementation(libs.duckdb.jdbc)
31+
testImplementation(libs.embedded.postgresql)
32+
2933
testImplementation(libs.kotestAssertions) {
3034
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
3135
}
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
package org.jetbrains.kotlinx.dataframe.io
2+
3+
import io.zonky.test.db.postgres.junit.EmbeddedPostgresRules
4+
import io.zonky.test.db.postgres.junit.SingleInstancePostgresRule
5+
import org.intellij.lang.annotations.Language
6+
import org.jetbrains.kotlinx.dataframe.AnyFrame
7+
import org.jetbrains.kotlinx.dataframe.DataFrame
8+
import org.jetbrains.kotlinx.dataframe.api.mapToFrame
9+
import org.jetbrains.kotlinx.dataframe.api.print
10+
import org.jetbrains.kotlinx.dataframe.io.db.DbType
11+
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
12+
import org.junit.Rule
13+
import org.junit.Test
14+
import org.postgresql.ds.PGSimpleDataSource
15+
import java.math.BigDecimal
16+
import java.sql.Date
17+
import java.sql.DriverManager
18+
import java.sql.ResultSet
19+
import java.sql.Time
20+
import java.sql.Timestamp
21+
import java.sql.Types
22+
import java.util.UUID
23+
import kotlin.reflect.KType
24+
25+
private const val URL = "jdbc:duckdb:"
26+
27+
object DuckDb : DbType("duckdb") {
28+
override val driverClassName = "org.duckdb.DuckDBDriver"
29+
30+
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? = null
31+
32+
override fun isSystemTable(tableMetadata: TableMetadata): Boolean =
33+
tableMetadata.schemaName?.lowercase()?.contains("information_schema") == true ||
34+
tableMetadata.schemaName?.lowercase()?.contains("system") == true
35+
36+
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
37+
TableMetadata(
38+
tables.getString("TABLE_NAME"),
39+
tables.getString("TABLE_SCHEM"),
40+
tables.getString("TABLE_CAT"),
41+
)
42+
43+
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? = null
44+
}
45+
46+
class DuckDbTest {
47+
48+
@Test
49+
fun `read dataframe from duckdb`() {
50+
val df = DriverManager.getConnection(URL).use { connection ->
51+
connection.prepareStatement(
52+
"""
53+
CREATE TABLE IF NOT EXISTS test_table (
54+
id INTEGER PRIMARY KEY,
55+
name VARCHAR,
56+
age INTEGER,
57+
salary DOUBLE,
58+
hire_date DATE
59+
)
60+
""".trimIndent(),
61+
).executeUpdate()
62+
63+
connection.prepareStatement(
64+
"""
65+
INSERT INTO test_table (id, name, age, salary, hire_date)
66+
VALUES
67+
(1, 'John Doe', 30, 50000.00, '2020-01-15'),
68+
(2, 'Jane Smith', 28, 55000.00, '2021-03-20'),
69+
(3, 'Bob Johnson', 35, 65000.00, '2019-11-10'),
70+
(4, 'Alice Brown', 32, 60000.00, '2020-07-01')
71+
""".trimIndent(),
72+
).executeUpdate()
73+
74+
DataFrame.readSqlTable(connection, "test_table", dbType = DuckDb)
75+
// DataFrame.readSqlQuery(connection, "SELECT * from test_table;", dbType = DuckDb)
76+
}
77+
78+
df.print(borders = true, columnTypes = true)
79+
}
80+
81+
@field:[JvmField Rule]
82+
val pg: SingleInstancePostgresRule = EmbeddedPostgresRules.singleInstance()
83+
84+
@Test // TODO
85+
fun `read postgres to duckdb to dataframe`() {
86+
val embeddedPg = pg.embeddedPostgres
87+
val dataSource = embeddedPg.postgresDatabase as PGSimpleDataSource
88+
89+
val dbname = dataSource.databaseName
90+
val username = dataSource.user
91+
val host = dataSource.serverNames.first()
92+
val port = dataSource.portNumbers.first()
93+
94+
val connection = dataSource.connection
95+
96+
// region filling the db
97+
98+
@Language("SQL")
99+
val createTableStatement = """
100+
CREATE TABLE IF NOT EXISTS table1 (
101+
id serial PRIMARY KEY,
102+
bigintCol bigint not null,
103+
smallintCol smallint not null,
104+
bigserialCol bigserial not null,
105+
booleanCol boolean not null,
106+
byteaCol bytea not null,
107+
characterCol character not null,
108+
characterNCol character(10) not null,
109+
charCol char not null,
110+
dateCol date not null,
111+
doubleCol double precision not null,
112+
integerCol integer,
113+
intArrayCol integer array,
114+
doubleArrayCol double precision array,
115+
dateArrayCol date array,
116+
textArrayCol text array,
117+
booleanArrayCol boolean array
118+
)
119+
"""
120+
connection.createStatement().execute(createTableStatement.trimIndent())
121+
122+
@Language("SQL")
123+
val createTableQuery = """
124+
CREATE TABLE IF NOT EXISTS table2 (
125+
id serial PRIMARY KEY,
126+
moneyCol money not null,
127+
numericCol numeric not null,
128+
realCol real not null,
129+
smallintCol smallint not null,
130+
serialCol serial not null,
131+
textCol text,
132+
timeCol time not null,
133+
timeWithZoneCol time with time zone not null,
134+
timestampCol timestamp not null,
135+
timestampWithZoneCol timestamp with time zone not null,
136+
uuidCol uuid not null
137+
)
138+
"""
139+
connection.createStatement().execute(createTableQuery.trimIndent())
140+
141+
@Language("SQL")
142+
val insertData1 = """
143+
INSERT INTO table1 (
144+
bigintCol, smallintCol, bigserialCol, booleanCol,
145+
byteaCol, characterCol, characterNCol, charCol,
146+
dateCol, doubleCol,
147+
integerCol, intArrayCol,
148+
doubleArrayCol, dateArrayCol, textArrayCol, booleanArrayCol
149+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
150+
"""
151+
152+
@Language("SQL")
153+
val insertData2 = """
154+
INSERT INTO table2 (
155+
moneyCol, numericCol,
156+
realCol, smallintCol,
157+
serialCol, textCol, timeCol,
158+
timeWithZoneCol, timestampCol, timestampWithZoneCol,
159+
uuidCol
160+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
161+
"""
162+
163+
val intArray = connection.createArrayOf("INTEGER", arrayOf(1, 2, 3))
164+
val doubleArray = connection.createArrayOf("DOUBLE", arrayOf(1.1, 2.2, 3.3))
165+
val dateArray = connection.createArrayOf(
166+
"DATE",
167+
arrayOf(Date.valueOf("2023-08-01"), Date.valueOf("2023-08-02")),
168+
)
169+
val textArray = connection.createArrayOf("TEXT", arrayOf("Hello", "World"))
170+
val booleanArray = connection.createArrayOf("BOOLEAN", arrayOf(true, false, true))
171+
172+
connection.prepareStatement(insertData1).use { st ->
173+
// Insert data into table1
174+
for (i in 1..3) {
175+
st.setLong(1, i * 1000L)
176+
st.setShort(2, 11.toShort())
177+
st.setLong(3, 1000000000L + i)
178+
st.setBoolean(4, i % 2 == 1)
179+
st.setBytes(5, byteArrayOf(1, 2, 3))
180+
st.setString(6, "A")
181+
st.setString(7, "Hello")
182+
st.setString(8, "A")
183+
st.setDate(9, Date.valueOf("2023-08-01"))
184+
st.setDouble(10, 12.34)
185+
st.setInt(11, 12345 * i)
186+
st.setArray(12, intArray)
187+
st.setArray(13, doubleArray)
188+
st.setArray(14, dateArray)
189+
st.setArray(15, textArray)
190+
st.setArray(16, booleanArray)
191+
st.executeUpdate()
192+
}
193+
}
194+
195+
connection.prepareStatement(insertData2).use { st ->
196+
// Insert data into table2
197+
for (i in 1..3) {
198+
st.setBigDecimal(1, BigDecimal("123.45"))
199+
st.setBigDecimal(2, BigDecimal("12.34"))
200+
st.setFloat(3, 12.34f)
201+
st.setInt(4, 1000 + i)
202+
st.setInt(5, 1000000 + i)
203+
st.setString(6, null)
204+
st.setTime(7, Time.valueOf("12:34:56"))
205+
st.setTimestamp(8, Timestamp(System.currentTimeMillis()))
206+
st.setTimestamp(9, Timestamp(System.currentTimeMillis()))
207+
st.setTimestamp(10, Timestamp(System.currentTimeMillis()))
208+
st.setObject(11, UUID.randomUUID(), Types.OTHER)
209+
st.executeUpdate()
210+
}
211+
}
212+
213+
// endregion
214+
215+
var df1: AnyFrame
216+
var df2: AnyFrame
217+
218+
DriverManager.getConnection(URL).use { connection ->
219+
220+
// install and load PostgreSQL
221+
connection.createStatement().execute("INSTALL postgres; LOAD postgres;")
222+
223+
// attach the database and USE it
224+
connection.createStatement().execute(
225+
"ATTACH 'dbname=$dbname user=$username host=$host port=$port' AS db (TYPE postgres, SCHEMA 'public'); USE db;",
226+
)
227+
228+
// df1 = DataFrame.readSqlTable(connection, "table1", dbType = DuckDb)
229+
df1 = DataFrame.readSqlQuery(connection, "SELECT * from table1", dbType = DuckDb)
230+
// df2 = DataFrame.readSqlTable(connection, "table2", dbType = DuckDb)
231+
df2 = DataFrame.readSqlQuery(connection, "SELECT * from table2", dbType = DuckDb)
232+
}
233+
234+
df1.print(columnTypes = true, borders = true)
235+
df1.mapToFrame {
236+
expr { "colA"<Int>() + "colB"<Double>() } into "sum"
237+
"sum" from { "colA"<Int>() + "colB"<Double>() }
238+
}
239+
240+
df2.print(columnTypes = true, borders = true)
241+
}
242+
}

gradle/libs.versions.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ junit-platform = "1.11.3"
4545
kotestAsserions = "6.0.0.M1"
4646

4747
jsoup = "1.18.3"
48-
arrow = "18.1.0"
48+
arrow = "18.3.0"
4949
arrow-adbc = "0.18.0"
5050
embedded-postgresql = "2.1.0"
5151
kodex = "0.4.4"
@@ -56,7 +56,7 @@ shadow = "8.3.5"
5656
android-gradle-api = "7.3.1" # need to revise our tests to update
5757
ktor = "3.0.1" # needs jupyter compatibility with Kotlin 2.1 to update
5858
kotlin-compile-testing = "1.6.0"
59-
duckdb = "1.1.3"
59+
duckdb = "1.2.2.0"
6060
buildconfig = "5.5.1"
6161
benchmark = "0.4.12"
6262

@@ -126,6 +126,7 @@ arrow-format = { group = "org.apache.arrow", name = "arrow-format", version.ref
126126
arrow-vector = { group = "org.apache.arrow", name = "arrow-vector", version.ref = "arrow" }
127127
arrow-memory = { group = "org.apache.arrow", name = "arrow-memory-unsafe", version.ref = "arrow" }
128128
arrow-c-data = { group = "org.apache.arrow", name = "arrow-c-data", version.ref = "arrow" }
129+
arrow-dataset = { group = "org.apache.arrow", name = "arrow-dataset", version.ref = "arrow" }
129130

130131

131132
# https://repo.maven.apache.org/maven2/org/apache/arrow/adbc/adbc-driver-jdbc/18.0.0/adbc-driver-jdbc-18.0.0.pom

0 commit comments

Comments
 (0)