feat: Support DBRX model in Llama

reneleonhardt · reneleonhardt · commit b9421adec6f6 · 2024-04-21T08:46:43.000+02:00
diff --git a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java
@@ -52,7 +52,14 @@ public enum HuggingFaceModel {
   LLAMA_3_8B_Q8_0(8, 8, "Meta-Llama-3-8B-Instruct-Q8_0.gguf", "lmstudio-community"),
   LLAMA_3_70B_IQ1(70, 1, "Meta-Llama-3-70B-Instruct-IQ1_M.gguf", "lmstudio-community"),
   LLAMA_3_70B_IQ2_XS(70, 2, "Meta-Llama-3-70B-Instruct-IQ2_XS.gguf", "lmstudio-community"),
-  LLAMA_3_70B_Q4_K_M(70, 4, "Meta-Llama-3-70B-Instruct-Q4_K_M.gguf", "lmstudio-community");
+  LLAMA_3_70B_Q4_K_M(70, 4, "Meta-Llama-3-70B-Instruct-Q4_K_M.gguf", "lmstudio-community"),
+
+  DBRX_12B_Q3_K_M(12, 3, "dbrx-16x12b-instruct-q3_k_m-gguf", "phymbert"),
+  DBRX_12B_Q4_0(12, 4, "dbrx-16x12b-instruct-q4_0-gguf", "phymbert"),
+  DBRX_12B_Q6_K(12, 6, "dbrx-16x12b-instruct-q6_k-gguf", "phymbert"),
+  DBRX_12B_Q8_0(12, 8, "dbrx-16x12b-instruct-q8_0-gguf", "phymbert"),
+  DBRX_12B_Q3_S(12, 3, "dbrx-16x12b-instruct-iq3_s-gguf", "phymbert"),
+  DBRX_12B_Q3_XXS(12, 3, "dbrx-16x12b-instruct-iq3_xxs-gguf", "phymbert");
 
   private final int parameterSize;
   private final int quantization;
@@ -86,7 +93,8 @@ public String getFileName() {
     if ("TheBloke".equals(user)) {
       return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
     }
-    return modelName;
+    // TODO: Download all 10 files ;(
+    return modelName.toLowerCase().replace("-gguf", "-00001-of-00010.gguf");
   }
 
   public URL getFileURL() {
diff --git a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java
@@ -99,7 +99,21 @@ public enum LlamaModel {
           HuggingFaceModel.LLAMA_3_8B_Q8_0,
           HuggingFaceModel.LLAMA_3_70B_IQ1,
           HuggingFaceModel.LLAMA_3_70B_IQ2_XS,
-          HuggingFaceModel.LLAMA_3_70B_Q4_K_M));
+          HuggingFaceModel.LLAMA_3_70B_Q4_K_M)),
+  DBRX(
+      "DBRX",
+      "DBRX is a Mixture-of-Experts (MoE) model with 132B total parameters and 36B live parameters."
+          + "Generation speed is significantly faster than LLaMA2-70B, while at the same time "
+          + "beating other open source models, such as, LLaMA2-70B, Mixtral, and Grok-1 on "
+          + "language understanding, programming, math, and logic.",
+      PromptTemplate.CHAT_ML,
+      List.of(
+          HuggingFaceModel.DBRX_12B_Q3_K_M,
+          HuggingFaceModel.DBRX_12B_Q4_0,
+          HuggingFaceModel.DBRX_12B_Q6_K,
+          HuggingFaceModel.DBRX_12B_Q8_0,
+          HuggingFaceModel.DBRX_12B_Q3_S,
+          HuggingFaceModel.DBRX_12B_Q3_XXS));
 
   private final String label;
   private final String description;