@@ -26,14 +26,28 @@ import XCTest
2626@available ( iOS 15 . 0 , macOS 12 . 0 , macCatalyst 15 . 0 , tvOS 15 . 0 , watchOS 8 . 0 , * )
2727final class MultimodalSnippets : XCTestCase {
2828 let bundle = BundleTestUtil . bundle ( )
29- lazy var model = VertexAI . vertexAI ( ) . generativeModel ( modelName: " gemini-1.5 -flash " )
29+ lazy var model = VertexAI . vertexAI ( ) . generativeModel ( modelName: " gemini-2.0 -flash " )
3030 lazy var videoURL = {
3131 guard let url = bundle. url ( forResource: " animals " , withExtension: " mp4 " ) else {
3232 fatalError ( " Video file animals.mp4 not found in Resources. " )
3333 }
3434 return url
3535 } ( )
3636
37+ lazy var audioURL = {
38+ guard let url = bundle. url ( forResource: " hello-world " , withExtension: " mp3 " ) else {
39+ fatalError ( " Audio file hello-world.mp3 not found in Resources. " )
40+ }
41+ return url
42+ } ( )
43+
44+ lazy var pdfURL = {
45+ guard let url = bundle. url ( forResource: " gemini-report " , withExtension: " pdf " ) else {
46+ fatalError ( " PDF file gemini-report.pdf not found in Resources. " )
47+ }
48+ return url
49+ } ( )
50+
3751 override func setUpWithError( ) throws {
3852 try FirebaseApp . configureDefaultAppForSnippets ( )
3953 }
@@ -42,6 +56,8 @@ final class MultimodalSnippets: XCTestCase {
4256 await FirebaseApp . deleteDefaultAppForSnippets ( )
4357 }
4458
59+ // MARK: - Image Input
60+
4561 #if canImport(UIKit)
4662 func testMultimodalOneImageNonStreaming( ) async throws {
4763 guard let image = UIImage ( systemName: " bicycle " ) else { fatalError ( ) }
@@ -98,6 +114,8 @@ final class MultimodalSnippets: XCTestCase {
98114 }
99115 #endif // canImport(UIKit)
100116
117+ // MARK: - Video Input
118+
101119 func testMultimodalVideoNonStreaming( ) async throws {
102120 // Provide the video as `Data` with the appropriate MIME type
103121 let video = try InlineDataPart ( data: Data ( contentsOf: videoURL) , mimeType: " video/mp4 " )
@@ -125,4 +143,73 @@ final class MultimodalSnippets: XCTestCase {
125143 }
126144 }
127145 }
146+
147+ // MARK: - Audio Input
148+
149+ func testMultiModalAudioNonStreaming( ) async throws {
150+ // Provide the audio as `Data` with the appropriate MIME type
151+ let audio = try InlineDataPart ( data: Data ( contentsOf: audioURL) , mimeType: " audio/mpeg " )
152+
153+ // Provide a text prompt to include with the audio
154+ let prompt = " Transcribe what's said in this audio recording. "
155+
156+ // To generate text output, call `generateContent` with the audio and text prompt
157+ let response = try await model. generateContent ( audio, prompt)
158+
159+ // Print the generated text, handling the case where it might be nil
160+ print ( response. text ?? " No text in response. " )
161+ }
162+
163+ func testMultiModalAudioStreaming( ) async throws {
164+ // Provide the audio as `Data` with the appropriate MIME type
165+ let audio = try InlineDataPart ( data: Data ( contentsOf: audioURL) , mimeType: " audio/mpeg " )
166+
167+ // Provide a text prompt to include with the audio
168+ let prompt = " Transcribe what's said in this audio recording. "
169+
170+ // To stream generated text output, call `generateContentStream` with the audio and text prompt
171+ let contentStream = try model. generateContentStream ( audio, prompt)
172+
173+ // Print the generated text, handling the case where it might be nil
174+ for try await chunk in contentStream {
175+ if let text = chunk. text {
176+ print ( text)
177+ }
178+ }
179+ }
180+
181+ // MARK: - Document Input
182+
183+ func testMultiModalPDFStreaming( ) async throws {
184+ // Provide the PDF as `Data` with the appropriate MIME type
185+ let pdf = try InlineDataPart ( data: Data ( contentsOf: pdfURL) , mimeType: " application/pdf " )
186+
187+ // Provide a text prompt to include with the PDF file
188+ let prompt = " Summarize the important results in this report. "
189+
190+ // To stream generated text output, call `generateContentStream` with the PDF file and text
191+ // prompt
192+ let contentStream = try model. generateContentStream ( pdf, prompt)
193+
194+ // Print the generated text, handling the case where it might be nil
195+ for try await chunk in contentStream {
196+ if let text = chunk. text {
197+ print ( text)
198+ }
199+ }
200+ }
201+
202+ func testMultiModalPDFNonStreaming( ) async throws {
203+ // Provide the PDF as `Data` with the appropriate MIME type
204+ let pdf = try InlineDataPart ( data: Data ( contentsOf: pdfURL) , mimeType: " application/pdf " )
205+
206+ // Provide a text prompt to include with the PDF file
207+ let prompt = " Summarize the important results in this report. "
208+
209+ // To generate text output, call `generateContent` with the PDF file and text prompt
210+ let response = try await model. generateContent ( pdf, prompt)
211+
212+ // Print the generated text, handling the case where it might be nil
213+ print ( response. text ?? " No text in response. " )
214+ }
128215}
0 commit comments