gptel-ollama: switch to chat API
* gptel-ollama.el (gptel-curl--parse-stream, gptel--parse-response, gptel--request-data, gptel--parse-buffer, gptel--ollama-context, gptel--ollama-token-count, gptel-make-ollama): Switch to Ollama's chat API from the completions API. This makes interacting with Ollama fully stateless, like all the other APIs, and should help significantly with issues like #249 and #279. Support non-streaming responses from Ollama in the process. Remove `gptel--ollama-context` as it is no longer needed. Add a `gptel--ollama-token-count` for tracking token costs. A UI affordance for this is not implemented yet, but is planned.
This commit is contained in:
parent
9b094b8b1e
commit
66a63e6c82
1 changed files with 56 additions and 40 deletions
|
@ -34,14 +34,16 @@
|
|||
(:copier nil)
|
||||
(:include gptel-backend)))
|
||||
|
||||
(defvar-local gptel--ollama-context nil
|
||||
"Context for ollama conversations.
|
||||
(defvar-local gptel--ollama-token-count 0
|
||||
"Token count for ollama conversations.
|
||||
|
||||
This variable holds the context array for conversations with
|
||||
Ollama models.")
|
||||
This variable holds the total token count for conversations with
|
||||
Ollama models.
|
||||
|
||||
Intended for internal use only.")
|
||||
|
||||
(cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info)
|
||||
";TODO: "
|
||||
"Parse response stream for the Ollama API."
|
||||
(when (bobp)
|
||||
(re-search-forward "^{")
|
||||
(forward-line 0))
|
||||
|
@ -50,55 +52,69 @@ Ollama models.")
|
|||
(while (setq content (gptel--json-read))
|
||||
(setq pt (point))
|
||||
(let ((done (map-elt content :done))
|
||||
(response (map-elt content :response)))
|
||||
(response (map-nested-elt content '(:message :content))))
|
||||
(push response content-strs)
|
||||
(unless (eq done :json-false)
|
||||
(with-current-buffer (plist-get info :buffer)
|
||||
(setq gptel--ollama-context (map-elt content :context)))
|
||||
(cl-incf gptel--ollama-token-count
|
||||
(+ (or (map-elt content :prompt_eval_count) 0)
|
||||
(or (map-elt content :eval_count) 0))))
|
||||
(goto-char (point-max)))))
|
||||
(error (goto-char pt)))
|
||||
(apply #'concat (nreverse content-strs))))
|
||||
|
||||
(cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info)
|
||||
(when-let ((context (map-elt response :context)))
|
||||
"Parse a one-shot RESPONSE from the Ollama API."
|
||||
(when-let ((context
|
||||
(+ (or (map-elt response :prompt_eval_count) 0)
|
||||
(or (map-elt response :eval_count) 0))))
|
||||
(with-current-buffer (plist-get info :buffer)
|
||||
(setq gptel--ollama-context context)))
|
||||
(map-elt response :response))
|
||||
(cl-incf gptel--ollama-token-count context)))
|
||||
(map-nested-elt response '(:message :content)))
|
||||
|
||||
(cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts)
|
||||
"JSON encode PROMPTS for Ollama."
|
||||
"JSON encode PROMPTS for sending to ChatGPT."
|
||||
(let ((prompts-plist
|
||||
`(:model ,gptel-model
|
||||
,@prompts
|
||||
:messages [,@prompts]
|
||||
:stream ,(or (and gptel-stream gptel-use-curl
|
||||
(gptel-backend-stream gptel-backend))
|
||||
:json-false))))
|
||||
(when gptel--ollama-context
|
||||
(plist-put prompts-plist :context gptel--ollama-context))
|
||||
:json-false)))
|
||||
options-plist)
|
||||
(when gptel-temperature
|
||||
(setq options-plist
|
||||
(plist-put options-plist :temperature
|
||||
gptel-temperature)))
|
||||
(when gptel-max-tokens
|
||||
(setq options-plist
|
||||
(plist-put options-plist :num_predict
|
||||
gptel-max-tokens)))
|
||||
(when options-plist
|
||||
(plist-put prompts-plist :options options-plist))
|
||||
prompts-plist))
|
||||
|
||||
(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional _max-entries)
|
||||
(let ((prompts)
|
||||
(prop (text-property-search-backward
|
||||
(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional max-entries)
|
||||
(let ((prompts) (prop))
|
||||
(while (and
|
||||
(or (not max-entries) (>= max-entries 0))
|
||||
(setq prop (text-property-search-backward
|
||||
'gptel 'response
|
||||
(when (get-char-property (max (point-min) (1- (point)))
|
||||
'gptel)
|
||||
t))))
|
||||
(if (and (prop-match-p prop)
|
||||
(prop-match-value prop))
|
||||
(user-error "No user prompt found!")
|
||||
(setq prompts (list
|
||||
:system gptel--system-message
|
||||
:prompt
|
||||
(if (prop-match-p prop)
|
||||
(push (list :role (if (prop-match-value prop) "assistant" "user")
|
||||
:content
|
||||
(string-trim
|
||||
(buffer-substring-no-properties (prop-match-beginning prop)
|
||||
(prop-match-end prop))
|
||||
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
|
||||
(regexp-quote (gptel-prompt-prefix-string)))
|
||||
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
|
||||
(regexp-quote (gptel-response-prefix-string))))
|
||||
"")))
|
||||
(regexp-quote (gptel-response-prefix-string)))))
|
||||
prompts)
|
||||
(and max-entries (cl-decf max-entries)))
|
||||
(cons (list :role "system"
|
||||
:content gptel--system-message)
|
||||
prompts)))
|
||||
|
||||
;;;###autoload
|
||||
|
@ -106,7 +122,7 @@ Ollama models.")
|
|||
(name &key curl-args header key models stream
|
||||
(host "localhost:11434")
|
||||
(protocol "http")
|
||||
(endpoint "/api/generate"))
|
||||
(endpoint "/api/chat"))
|
||||
"Register an Ollama backend for gptel with NAME.
|
||||
|
||||
Keyword arguments:
|
||||
|
|
Loading…
Add table
Reference in a new issue