44 * @fileoverview The `DatabricksDriver` and related types declaration.
55 */
66
7+ import { assertDataSource , getEnv , } from '@cubejs-backend/shared' ;
78import {
8- getEnv ,
9- assertDataSource ,
10- } from '@cubejs-backend/shared' ;
11- import {
9+ DatabaseStructure ,
1210 DriverCapabilities ,
11+ GenericDataBaseType ,
1312 QueryColumnsResult ,
1413 QueryOptions ,
1514 QuerySchemasResult ,
1615 QueryTablesResult ,
17- UnloadOptions ,
18- GenericDataBaseType ,
1916 TableColumn ,
20- DatabaseStructure ,
17+ UnloadOptions ,
2118} from '@cubejs-backend/base-driver' ;
22- import {
23- JDBCDriver ,
24- JDBCDriverConfiguration ,
25- } from '@cubejs-backend/jdbc-driver' ;
19+ import { JDBCDriver , JDBCDriverConfiguration , } from '@cubejs-backend/jdbc-driver' ;
2620import { DatabricksQuery } from './DatabricksQuery' ;
27- import { resolveJDBCDriver , extractUidFromJdbcUrl } from './helpers' ;
21+ import { extractUidFromJdbcUrl , resolveJDBCDriver } from './helpers' ;
22+
23+ const SUPPORTED_BUCKET_TYPES = [ 's3' , 'gcs' , 'azure' ] ;
2824
2925export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
3026 {
@@ -103,6 +99,11 @@ export type DatabricksDriverConfiguration = JDBCDriverConfiguration &
10399 * Azure service principal client secret
104100 */
105101 azureClientSecret ?: string ,
102+
103+ /**
104+ * GCS credentials JSON content
105+ */
106+ gcsCredentials ?: string ,
106107 } ;
107108
108109type ShowTableRow = {
@@ -209,7 +210,7 @@ export class DatabricksDriver extends JDBCDriver {
209210 // common export bucket config
210211 bucketType :
211212 conf ?. bucketType ||
212- getEnv ( 'dbExportBucketType' , { supported : [ 's3' , 'azure' ] , dataSource } ) ,
213+ getEnv ( 'dbExportBucketType' , { supported : SUPPORTED_BUCKET_TYPES , dataSource } ) ,
213214 exportBucket :
214215 conf ?. exportBucket ||
215216 getEnv ( 'dbExportBucket' , { dataSource } ) ,
@@ -246,6 +247,10 @@ export class DatabricksDriver extends JDBCDriver {
246247 azureClientSecret :
247248 conf ?. azureClientSecret ||
248249 getEnv ( 'dbExportBucketAzureClientSecret' , { dataSource } ) ,
250+ // GCS credentials
251+ gcsCredentials :
252+ conf ?. gcsCredentials ||
253+ getEnv ( 'dbExportGCSCredentials' , { dataSource } ) ,
249254 } ;
250255 if ( config . readOnly === undefined ) {
251256 // we can set readonly to true if there is no bucket config provided
@@ -429,8 +434,7 @@ export class DatabricksDriver extends JDBCDriver {
429434 metadata [ database ] = { } ;
430435 }
431436
432- const columns = await this . tableColumnTypes ( `${ database } .${ tableName } ` ) ;
433- metadata [ database ] [ tableName ] = columns ;
437+ metadata [ database ] [ tableName ] = await this . tableColumnTypes ( `${ database } .${ tableName } ` ) ;
434438 } ) ) ;
435439
436440 return metadata ;
@@ -527,7 +531,7 @@ export class DatabricksDriver extends JDBCDriver {
527531 * Returns table columns types.
528532 */
529533 public override async tableColumnTypes ( table : string ) : Promise < TableColumn [ ] > {
530- let tableFullName = '' ;
534+ let tableFullName : string ;
531535 const tableArray = table . split ( '.' ) ;
532536
533537 if ( tableArray . length === 3 ) {
@@ -643,7 +647,7 @@ export class DatabricksDriver extends JDBCDriver {
643647 * export bucket data.
644648 */
645649 public async unload ( tableName : string , options : UnloadOptions ) {
646- if ( ! [ 'azure' , 's3' ] . includes ( this . config . bucketType as string ) ) {
650+ if ( ! SUPPORTED_BUCKET_TYPES . includes ( this . config . bucketType as string ) ) {
647651 throw new Error ( `Unsupported export bucket type: ${
648652 this . config . bucketType
649653 } `) ;
@@ -733,6 +737,12 @@ export class DatabricksDriver extends JDBCDriver {
733737 url . host ,
734738 objectSearchPrefix ,
735739 ) ;
740+ } else if ( this . config . bucketType === 'gcs' ) {
741+ return this . extractFilesFromGCS (
742+ { credentials : this . config . gcsCredentials } ,
743+ url . host ,
744+ objectSearchPrefix ,
745+ ) ;
736746 } else {
737747 throw new Error ( `Unsupported export bucket type: ${
738748 this . config . bucketType
@@ -759,16 +769,22 @@ export class DatabricksDriver extends JDBCDriver {
759769 *
760770 * For Azure blob storage you need to configure account access key in
761771 * Cluster -> Configuration -> Advanced options
762- * ( https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly)
772+ * https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly
763773 *
764774 * `fs.azure.account.key.<storage-account-name>.blob.core.windows.net <storage-account-access-key>`
765775 *
766776 * For S3 bucket storage you need to configure AWS access key and secret in
767777 * Cluster -> Configuration -> Advanced options
768- * ( https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly)
778+ * https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly
769779 *
770780 * `fs.s3a.access.key <aws-access-key>`
771781 * `fs.s3a.secret.key <aws-secret-key>`
782+ *
783+ * For Google cloud storage you can configure storage credentials and create an external location to access it
784+ * or configure account service key (legacy)
785+ * https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/storage-credentials
786+ * https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/external-locations
787+ * https://docs.databricks.com/aws/en/connect/storage/gcs
772788 */
773789 private async createExternalTableFromSql ( tableFullName : string , sql : string , params : unknown [ ] , columns : ColumnInfo [ ] ) {
774790 let select = sql ;
@@ -780,15 +796,15 @@ export class DatabricksDriver extends JDBCDriver {
780796 try {
781797 await this . query (
782798 `
783- CREATE TABLE ${ tableFullName }
784- USING CSV LOCATION '${ this . config . exportBucketMountDir || this . config . exportBucket } /${ tableFullName } .csv '
799+ CREATE TABLE ${ tableFullName } _tmp
800+ USING CSV LOCATION '${ this . config . exportBucketMountDir || this . config . exportBucket } /${ tableFullName } '
785801 OPTIONS (escape = '"')
786802 AS (${ select } );
787803 ` ,
788804 params ,
789805 ) ;
790806 } finally {
791- await this . query ( `DROP TABLE IF EXISTS ${ tableFullName } ;` , [ ] ) ;
807+ await this . query ( `DROP TABLE IF EXISTS ${ tableFullName } _tmp ;` , [ ] ) ;
792808 }
793809 }
794810
@@ -798,30 +814,36 @@ export class DatabricksDriver extends JDBCDriver {
798814 *
799815 * For Azure blob storage you need to configure account access key in
800816 * Cluster -> Configuration -> Advanced options
801- * ( https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly)
817+ * https://docs.databricks.com/data/data-sources/azure/azure-storage.html#access-azure-blob-storage-directly
802818 *
803819 * `fs.azure.account.key.<storage-account-name>.blob.core.windows.net <storage-account-access-key>`
804820 *
805821 * For S3 bucket storage you need to configure AWS access key and secret in
806822 * Cluster -> Configuration -> Advanced options
807- * ( https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly)
823+ * https://docs.databricks.com/data/data-sources/aws/amazon-s3.html#access-s3-buckets-directly
808824 *
809825 * `fs.s3a.access.key <aws-access-key>`
810826 * `fs.s3a.secret.key <aws-secret-key>`
827+ *
828+ * For Google cloud storage you can configure storage credentials and create an external location to access it
829+ * or configure account service key (legacy)
830+ * https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/storage-credentials
831+ * https://docs.databricks.com/gcp/en/connect/unity-catalog/cloud-storage/external-locations
832+ * https://docs.databricks.com/aws/en/connect/storage/gcs
811833 */
812834 private async createExternalTableFromTable ( tableFullName : string , columns : ColumnInfo [ ] ) {
813835 try {
814836 await this . query (
815837 `
816- CREATE TABLE _ ${ tableFullName }
817- USING CSV LOCATION '${ this . config . exportBucketMountDir || this . config . exportBucket } /${ tableFullName } .csv '
838+ CREATE TABLE ${ tableFullName } _tmp
839+ USING CSV LOCATION '${ this . config . exportBucketMountDir || this . config . exportBucket } /${ tableFullName } '
818840 OPTIONS (escape = '"')
819841 AS SELECT ${ this . generateTableColumnsForExport ( columns ) } FROM ${ tableFullName }
820842 ` ,
821843 [ ] ,
822844 ) ;
823845 } finally {
824- await this . query ( `DROP TABLE IF EXISTS _ ${ tableFullName } ;` , [ ] ) ;
846+ await this . query ( `DROP TABLE IF EXISTS ${ tableFullName } _tmp ;` , [ ] ) ;
825847 }
826848 }
827849}
0 commit comments