diff --git a/llama_proxy_man/config.yaml b/llama_proxy_man/config.yaml index eb637cc..25fb678 100644 --- a/llama_proxy_man/config.yaml +++ b/llama_proxy_man/config.yaml @@ -60,7 +60,7 @@ models: CUDA_VISIBLE_DEVICES: 0 HSA_OVERRIDE_GFX_VERSION: '11.0.0' args: - ctx-size: 16384 + ctx-size: 8192 flash-attn: true cache-type-k: q8_0 cache-type-v: q8_0