diff --git a/llama_proxy_man/config.yaml b/llama_proxy_man/config.yaml
index 8aba5d6..c658f7a 100644
--- a/llama_proxy_man/config.yaml
+++ b/llama_proxy_man/config.yaml
@@ -5,30 +5,37 @@ models:
   - name: "tabby-code"
     port: 18080
     internal_port: 28080
-    vram_usage: 8.25G
-    ram_usage: 2.6G
+    autostart: true
+    vram_usage: 26.7G   # Coder-32B + draft 0.5B
+    ram_usage: 3G       # Coder-32B + draft 0.5B
+    # vram_usage: 8.25G # Coder-7B
+    # ram_usage: 2.6G   # Coder-7B
     env:
       CUDA_VISIBLE_DEVICES: 0
       HSA_OVERRIDE_GFX_VERSION: '11.0.0'
     args:
-      model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-7B-Instruct-Q6_K_L.gguf
-      gpu-layers: 9999
+      # host: 0.0.0.0
       flash-attn: true
       ctx-size: 32768
-      host: 0.0.0.0
+      model: /media/SanDisk/ai/models_live/Qwen2.5-Coder-32B-Instruct-IQ4_XS.gguf
+      gpu-layers: 9999
+      model-draft: /media/SanDisk/ai/models_live/Qwen2.5-Coder-0.5B-Q8_0.gguf
+      gpu-layers-draft: 9999
+      draft-max: 16
+      draft-min: 5
   - name: "tabby-embeddings"
     port: 18081
     internal_port: 28081
-    vram_usage: 1G
-    ram_usage: 2G
+    vram_usage: 0.4G
+    ram_usage: 2.5G
     env:
       CUDA_VISIBLE_DEVICES: 0
       HSA_OVERRIDE_GFX_VERSION: '11.0.0'
     args:
       model: /media/SanDisk/ai/models_live/nomic-embed-text-v1-f32.gguf
-      gpu-layers: 9999
+      gpu-layers: -1
       flash-attn: true
-      host: 0.0.0.0
+      # host: 0.0.0.0
       embeddings: true
   - name: "big-chat"
     port: 18082
@@ -43,4 +50,21 @@ models:
       gpu-layers: 9999
       ctx-size: 16384
       flash-attn: true
-      host: 0.0.0.0
+      # host: 0.0.0.0
+  - name: "bigger-chat"
+    port: 18083
+    internal_port: 28083
+    vram_usage: 29G
+    ram_usage: 4G
+    env:
+      CUDA_VISIBLE_DEVICES: 0
+      HSA_OVERRIDE_GFX_VERSION: '11.0.0'
+    args:
+      model: /media/SanDisk/ai/models_live/Llama-3.1-Nemotron-70B-Instruct-HF-IQ3_XXS.gguf
+      gpu-layers: 9999
+      flash-attn: true
+      # ctx-size: 8192
+      ctx-size: 16384
+      # host: 0.0.0.0
+      cache-type-k: q8_0
+      cache-type-v: q8_0