@@ -20,6 +20,7 @@ import org.apache.poi.util.DefaultTempFileCreationStrategy
20
20
import org.apache.poi.util.LocaleUtil
21
21
import org.apache.poi.util.LocaleUtil.getUserTimeZone
22
22
import org.apache.poi.util.TempFile
23
+ import org.apache.poi.xssf.streaming.SXSSFWorkbook
23
24
import org.apache.poi.xssf.usermodel.XSSFWorkbook
24
25
import org.jetbrains.kotlinx.dataframe.AnyFrame
25
26
import org.jetbrains.kotlinx.dataframe.AnyRow
@@ -470,6 +471,28 @@ private fun Cell?.cellValue(sheetName: String): Any? {
470
471
return getValueFromType(cellType)
471
472
}
472
473
474
+ public enum class WorkBookType {
475
+ XLS ,
476
+ XLSX ,
477
+ }
478
+
479
+ /* *
480
+ * Writes this DataFrame to an Excel file as a single sheet.
481
+ *
482
+ * Implemented with [Apache POI](https://poi.apache.org) using `HSSFWorkbook` for XLS files,
483
+ * `XSSFWorkbook` for standard XLSX files, and `SXSSFWorkbook` for memory-efficient streaming when creating new XLSX files.
484
+ *
485
+ * @param path The path to the file where the data will be written.
486
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include in the file. The default is all columns.
487
+ * @param sheetName The name of the sheet in the Excel file. If null, the default name will be used.
488
+ * @param writeHeader A flag indicating whether to write the header row in the Excel file. Defaults to true.
489
+ * @param workBookType The [type of workbook][WorkBookType] to create (e.g., XLS or XLSX). Defaults to XLSX.
490
+ * @param keepFile If `true` and the file already exists, a new sheet will be appended instead of overwriting the file.
491
+ * This may result in higher memory usage and slower performance compared to creating a new file.
492
+ * Defaults to `false`.
493
+ *
494
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
495
+ */
473
496
public fun <T > DataFrame<T>.writeExcel (
474
497
path : String ,
475
498
columnsSelector : ColumnsSelector <T , * > = { all() },
@@ -479,11 +502,24 @@ public fun <T> DataFrame<T>.writeExcel(
479
502
keepFile : Boolean = false,
480
503
): Unit = writeExcel(File (path), columnsSelector, sheetName, writeHeader, workBookType, keepFile)
481
504
482
- public enum class WorkBookType {
483
- XLS ,
484
- XLSX ,
485
- }
486
-
505
+ /* *
506
+ * Writes this DataFrame to an Excel file as a single sheet.
507
+ *
508
+ * Implemented with [Apache POI](https://poi.apache.org) using `HSSFWorkbook` for XLS files,
509
+ * `XSSFWorkbook` for standard XLSX files,
510
+ * and `SXSSFWorkbook` for memory-efficient streaming when creating new XLSX files.
511
+ *
512
+ * @param file The file where the data will be written.
513
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include in the file. The default is all columns.
514
+ * @param sheetName The name of the sheet in the Excel file. If null, the default name will be used.
515
+ * @param writeHeader A flag indicating whether to write the header row in the Excel file. Defaults to true.
516
+ * @param workBookType The [type of workbook][WorkBookType] to create (e.g., XLS or XLSX). Defaults to XLSX.
517
+ * @param keepFile If `true` and the file already exists, a new sheet will be appended instead of overwriting the file.
518
+ * This may result in higher memory usage and slower performance compared to creating a new file.
519
+ * Defaults to `false`.
520
+ *
521
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
522
+ */
487
523
public fun <T > DataFrame<T>.writeExcel (
488
524
file : File ,
489
525
columnsSelector : ColumnsSelector <T , * > = { all() },
@@ -493,22 +529,41 @@ public fun <T> DataFrame<T>.writeExcel(
493
529
keepFile : Boolean = false,
494
530
) {
495
531
val factory =
496
- if (keepFile) {
532
+ // Write to an existing file with `keepFile` flag
533
+ if (keepFile && file.exists() && file.length() > 0L ) {
534
+ val fis = file.inputStream()
497
535
when (workBookType) {
498
- WorkBookType .XLS -> HSSFWorkbook (file.inputStream() )
499
- WorkBookType .XLSX -> XSSFWorkbook (file.inputStream() )
536
+ WorkBookType .XLS -> HSSFWorkbook (fis )
537
+ WorkBookType .XLSX -> XSSFWorkbook (fis )
500
538
}
501
539
} else {
502
540
when (workBookType) {
503
541
WorkBookType .XLS -> HSSFWorkbook ()
504
- WorkBookType .XLSX -> XSSFWorkbook ()
542
+
543
+ // Use streaming mode for a new XLSX file
544
+ WorkBookType .XLSX -> SXSSFWorkbook ()
505
545
}
506
546
}
507
547
return file.outputStream().use {
508
548
writeExcel(it, columnsSelector, sheetName, writeHeader, factory)
509
549
}
510
550
}
511
551
552
+ /* *
553
+ * Writes this DataFrame to an Excel file using an existing [Workbook] instance into given [OutputStream].
554
+ *
555
+ * Uses [Apache POI](https://poi.apache.org).
556
+ * Supports [XSSFWorkbook] and [SXSSFWorkbook] for XLSX and [HSSFWorkbook] for XLS,
557
+ * and allows users to manage the workbook externally.
558
+ *
559
+ * @param outputStream The output stream where the Excel data will be written.
560
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include in the file. The default is all columns.
561
+ * @param sheetName The name of the sheet in the Excel file. If null, the default name will be used.
562
+ * @param writeHeader A flag indicating whether to write the header row in the Excel file. Defaults to true.
563
+ * @param factory The [Workbook] instance, allowing integration with an existing workbook.
564
+ *
565
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
566
+ */
512
567
public fun <T > DataFrame<T>.writeExcel (
513
568
outputStream : OutputStream ,
514
569
columnsSelector : ColumnsSelector <T , * > = { all() },
@@ -522,6 +577,25 @@ public fun <T> DataFrame<T>.writeExcel(
522
577
wb.close()
523
578
}
524
579
580
+ /* *
581
+ * Creates a new [Sheet] in the given [Workbook] and writes this DataFrame content into it.
582
+ *
583
+ * Uses [Apache POI](https://poi.apache.org).
584
+ * Supports [XSSFWorkbook] and [SXSSFWorkbook] for XLSX and [HSSFWorkbook] for XLS,
585
+ * and allows users to manage the workbook externally.
586
+ *
587
+ * Automatically handles datetime types.
588
+ * Skips null values to prevent Apache POI from treating empty cells incorrectly.
589
+ *
590
+ * @param wb The [Workbook] where the sheet will be created.
591
+ * @param columnsSelector A [selector][ColumnsSelector] to determine which columns to include. Defaults to all columns.
592
+ * @param sheetName The name of the sheet. If null, a default sheet name is used.
593
+ * @param writeHeader Whether to include a header row with column names. Defaults to true.
594
+ *
595
+ * @return The created [Sheet] instance containing the DataFrame data.
596
+ *
597
+ * @throws [IllegalArgumentException] if the [sheetName] is invalid or workbook already contains a sheet with this name.
598
+ */
525
599
public fun <T > DataFrame<T>.writeExcel (
526
600
wb : Workbook ,
527
601
columnsSelector : ColumnsSelector <T , * > = { all() },
0 commit comments