1
1
import Foundation
2
2
3
- let isQuantizedMode = CommandLine . arguments. contains ( " --quantized " )
3
+ let ( isGgufMode, isPhi4, isCpuMode) = (
4
+ CommandLine . arguments. contains ( " --gguf " ) ,
5
+ CommandLine . arguments. contains ( " --phi-4 " ) ,
6
+ CommandLine . arguments. contains ( " --cpu " )
7
+ )
4
8
5
- if isQuantizedMode {
6
- print ( " 🍃 Quantized mode is enabled. " )
7
- } else {
8
- print ( " 💪 Safe tensors mode is enabled. " )
9
- }
9
+ let formatMode = isGgufMode ? " 🍃 GGUF " : " 💪 Safe tensors "
10
+ let modelMode = isPhi4 ? " 🚀 Phi-4 " : " 🚗 Phi-3 "
10
11
11
- let modelProvider = isQuantizedMode ?
12
- PhiModelProvider . huggingFaceGguf ( modelRepo: " microsoft/Phi-3-mini-4k-instruct-gguf " , modelFileName: " Phi-3-mini-4k-instruct-q4.gguf " , modelRevision: " main " ) :
13
- PhiModelProvider . huggingFace ( modelRepo: " microsoft/Phi-3-mini-4k-instruct " , modelRevision: " main " )
12
+ print ( " \( formatMode) mode is enabled. \n \( modelMode) mode is enabled. " )
13
+
14
+ let modelProvider = switch ( isGgufMode, isPhi4) {
15
+ case ( true , true ) :
16
+ PhiModelProvider . huggingFaceGguf (
17
+ modelRepo: " microsoft/phi-4-gguf " ,
18
+ modelFileName: " phi-4-q4.gguf " ,
19
+ modelRevision: " main "
20
+ )
21
+ case ( true , false ) :
22
+ PhiModelProvider . huggingFaceGguf (
23
+ modelRepo: " microsoft/Phi-3-mini-4k-instruct-gguf " ,
24
+ modelFileName: " Phi-3-mini-4k-instruct-q4.gguf " ,
25
+ modelRevision: " main "
26
+ )
27
+ case ( false , true ) :
28
+ PhiModelProvider . huggingFace (
29
+ modelRepo: " microsoft/phi-4 " ,
30
+ modelRevision: " main "
31
+ )
32
+ case ( false , false ) :
33
+ PhiModelProvider . huggingFace (
34
+ modelRepo: " microsoft/Phi-3-mini-4k-instruct " ,
35
+ modelRevision: " main "
36
+ )
37
+ }
14
38
15
39
let inferenceOptionsBuilder = InferenceOptionsBuilder ( )
16
40
try ! inferenceOptionsBuilder. withTemperature ( temperature: 0.9 )
17
41
try ! inferenceOptionsBuilder. withSeed ( seed: 146628346 )
42
+ if isPhi4 {
43
+ try ! inferenceOptionsBuilder. withChatFormat ( chatFormat: ChatFormat . chatMl)
44
+ }
18
45
let inferenceOptions = try ! inferenceOptionsBuilder. build ( )
19
46
20
47
let cacheDir = FileManager . default. currentDirectoryPath. appending ( " /.cache " )
21
48
22
49
class ModelEventsHandler : PhiEventHandler {
23
- func onInferenceStarted( ) { }
24
-
25
- func onInferenceEnded( ) { }
26
-
50
+ func onInferenceStarted( ) {
51
+ print ( " ℹ️ Inference started... " )
52
+ }
53
+ func onInferenceEnded( ) {
54
+ print ( " \n ℹ️ Inference ended. " )
55
+ }
27
56
func onInferenceToken( token: String ) {
28
57
print ( token, terminator: " " )
29
58
}
30
-
31
59
func onModelLoaded( ) {
32
60
print ( """
33
- 🧠 Model loaded!
34
- ****************************************
35
- """ )
61
+ 🧠 Model loaded!
62
+ ****************************************
63
+ """ )
36
64
}
37
65
}
38
66
39
67
let modelBuilder = PhiEngineBuilder ( )
40
68
try ! modelBuilder. withEventHandler ( eventHandler: BoxedPhiEventHandler ( handler: ModelEventsHandler ( ) ) )
41
- let gpuEnabled = try ! modelBuilder. tryUseGpu ( )
42
69
try ! modelBuilder. withModelProvider ( modelProvider: modelProvider)
43
70
44
- let model = try ! modelBuilder. buildStateful ( cacheDir: cacheDir, systemInstruction: " You are a hockey poet. Be brief and polite. " )
71
+ if isPhi4 {
72
+ try ! modelBuilder. withTokenizerProvider ( tokenizerProvider: . huggingFace(
73
+ tokenizerRepo: " microsoft/phi-4 " ,
74
+ tokenizerFileName: " tokenizer.json "
75
+ ) )
76
+ }
77
+
78
+ if !isCpuMode {
79
+ let gpuEnabled = try ! modelBuilder. tryUseGpu ( )
80
+ print ( gpuEnabled ? " 🎮 GPU mode enabled. " : " 💻 Tried GPU, but falling back to CPU. " )
81
+ } else {
82
+ print ( " 💻 CPU mode enabled. " )
83
+ }
45
84
46
- // Run inference
47
- let result = try ! model. runInference ( promptText: " Write a haiku about ice hockey " , inferenceOptions: inferenceOptions)
85
+ let model = try ! modelBuilder. buildStateful (
86
+ cacheDir: cacheDir,
87
+ systemInstruction: " You are a hockey poet. Be brief and polite. "
88
+ )
48
89
49
- print ( """
90
+ let result = try ! model. runInference (
91
+ promptText: " Write a haiku about ice hockey " ,
92
+ inferenceOptions: inferenceOptions
93
+ )
50
94
95
+ print ( """
51
96
****************************************
52
97
📝 Tokens Generated: \( result. tokenCount)
53
98
🖥️ Tokens per second: \( result. tokensPerSecond)
54
99
⏱️ Duration: \( result. duration) s
55
- 🏎️ GPU enabled: \( gpuEnabled)
56
100
""" )
0 commit comments