gptel-ollama: switch to chat API

* gptel-ollama.el (gptel-curl--parse-stream,
gptel--parse-response, gptel--request-data, gptel--parse-buffer,
gptel--ollama-context, gptel--ollama-token-count,
gptel-make-ollama): Switch to Ollama's chat API from
the completions API.  This makes interacting with Ollama fully
stateless, like all the other APIs, and should help significantly
with issues like #249 and #279.  Support non-streaming responses
from Ollama in the process.

Remove `gptel--ollama-context` as it is no longer needed.

Add a `gptel--ollama-token-count` for tracking token costs. A UI
affordance for this is not implemented yet, but is planned.
This commit is contained in:
Karthik Chikmagalur 2024-04-22 12:21:49 -07:00
parent 9b094b8b1e
commit 66a63e6c82

View file

@ -34,14 +34,16 @@
(:copier nil) (:copier nil)
(:include gptel-backend))) (:include gptel-backend)))
(defvar-local gptel--ollama-context nil (defvar-local gptel--ollama-token-count 0
"Context for ollama conversations. "Token count for ollama conversations.
This variable holds the context array for conversations with This variable holds the total token count for conversations with
Ollama models.") Ollama models.
Intended for internal use only.")
(cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info) (cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info)
";TODO: " "Parse response stream for the Ollama API."
(when (bobp) (when (bobp)
(re-search-forward "^{") (re-search-forward "^{")
(forward-line 0)) (forward-line 0))
@ -50,63 +52,77 @@ Ollama models.")
(while (setq content (gptel--json-read)) (while (setq content (gptel--json-read))
(setq pt (point)) (setq pt (point))
(let ((done (map-elt content :done)) (let ((done (map-elt content :done))
(response (map-elt content :response))) (response (map-nested-elt content '(:message :content))))
(push response content-strs) (push response content-strs)
(unless (eq done :json-false) (unless (eq done :json-false)
(with-current-buffer (plist-get info :buffer) (with-current-buffer (plist-get info :buffer)
(setq gptel--ollama-context (map-elt content :context))) (cl-incf gptel--ollama-token-count
(+ (or (map-elt content :prompt_eval_count) 0)
(or (map-elt content :eval_count) 0))))
(goto-char (point-max))))) (goto-char (point-max)))))
(error (goto-char pt))) (error (goto-char pt)))
(apply #'concat (nreverse content-strs)))) (apply #'concat (nreverse content-strs))))
(cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info) (cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info)
(when-let ((context (map-elt response :context))) "Parse a one-shot RESPONSE from the Ollama API."
(when-let ((context
(+ (or (map-elt response :prompt_eval_count) 0)
(or (map-elt response :eval_count) 0))))
(with-current-buffer (plist-get info :buffer) (with-current-buffer (plist-get info :buffer)
(setq gptel--ollama-context context))) (cl-incf gptel--ollama-token-count context)))
(map-elt response :response)) (map-nested-elt response '(:message :content)))
(cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts) (cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts)
"JSON encode PROMPTS for Ollama." "JSON encode PROMPTS for sending to ChatGPT."
(let ((prompts-plist (let ((prompts-plist
`(:model ,gptel-model `(:model ,gptel-model
,@prompts :messages [,@prompts]
:stream ,(or (and gptel-stream gptel-use-curl :stream ,(or (and gptel-stream gptel-use-curl
(gptel-backend-stream gptel-backend)) (gptel-backend-stream gptel-backend))
:json-false)))) :json-false)))
(when gptel--ollama-context options-plist)
(plist-put prompts-plist :context gptel--ollama-context)) (when gptel-temperature
(setq options-plist
(plist-put options-plist :temperature
gptel-temperature)))
(when gptel-max-tokens
(setq options-plist
(plist-put options-plist :num_predict
gptel-max-tokens)))
(when options-plist
(plist-put prompts-plist :options options-plist))
prompts-plist)) prompts-plist))
(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional _max-entries) (cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional max-entries)
(let ((prompts) (let ((prompts) (prop))
(prop (text-property-search-backward (while (and
'gptel 'response (or (not max-entries) (>= max-entries 0))
(when (get-char-property (max (point-min) (1- (point))) (setq prop (text-property-search-backward
'gptel) 'gptel 'response
t)))) (when (get-char-property (max (point-min) (1- (point)))
(if (and (prop-match-p prop) 'gptel)
(prop-match-value prop)) t))))
(user-error "No user prompt found!") (push (list :role (if (prop-match-value prop) "assistant" "user")
(setq prompts (list :content
:system gptel--system-message (string-trim
:prompt (buffer-substring-no-properties (prop-match-beginning prop)
(if (prop-match-p prop) (prop-match-end prop))
(string-trim (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
(buffer-substring-no-properties (prop-match-beginning prop) (regexp-quote (gptel-prompt-prefix-string)))
(prop-match-end prop)) (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*" (regexp-quote (gptel-response-prefix-string)))))
(regexp-quote (gptel-prompt-prefix-string))) prompts)
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*" (and max-entries (cl-decf max-entries)))
(regexp-quote (gptel-response-prefix-string)))) (cons (list :role "system"
""))) :content gptel--system-message)
prompts))) prompts)))
;;;###autoload ;;;###autoload
(cl-defun gptel-make-ollama (cl-defun gptel-make-ollama
(name &key curl-args header key models stream (name &key curl-args header key models stream
(host "localhost:11434") (host "localhost:11434")
(protocol "http") (protocol "http")
(endpoint "/api/generate")) (endpoint "/api/chat"))
"Register an Ollama backend for gptel with NAME. "Register an Ollama backend for gptel with NAME.
Keyword arguments: Keyword arguments: