gptel-ollama: switch to chat API

* gptel-ollama.el (gptel-curl--parse-stream,
gptel--parse-response, gptel--request-data, gptel--parse-buffer,
gptel--ollama-context, gptel--ollama-token-count,
gptel-make-ollama): Switch to Ollama's chat API from
the completions API.  This makes interacting with Ollama fully
stateless, like all the other APIs, and should help significantly
with issues like #249 and #279.  Support non-streaming responses
from Ollama in the process.

Remove `gptel--ollama-context` as it is no longer needed.

Add a `gptel--ollama-token-count` for tracking token costs. A UI
affordance for this is not implemented yet, but is planned.
This commit is contained in:
Karthik Chikmagalur 2024-04-22 12:21:49 -07:00
parent 9b094b8b1e
commit 66a63e6c82

View file

@ -34,14 +34,16 @@
(:copier nil)
(:include gptel-backend)))
(defvar-local gptel--ollama-context nil
"Context for ollama conversations.
(defvar-local gptel--ollama-token-count 0
"Token count for ollama conversations.
This variable holds the context array for conversations with
Ollama models.")
This variable holds the total token count for conversations with
Ollama models.
Intended for internal use only.")
(cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info)
";TODO: "
"Parse response stream for the Ollama API."
(when (bobp)
(re-search-forward "^{")
(forward-line 0))
@ -50,55 +52,69 @@ Ollama models.")
(while (setq content (gptel--json-read))
(setq pt (point))
(let ((done (map-elt content :done))
(response (map-elt content :response)))
(response (map-nested-elt content '(:message :content))))
(push response content-strs)
(unless (eq done :json-false)
(with-current-buffer (plist-get info :buffer)
(setq gptel--ollama-context (map-elt content :context)))
(cl-incf gptel--ollama-token-count
(+ (or (map-elt content :prompt_eval_count) 0)
(or (map-elt content :eval_count) 0))))
(goto-char (point-max)))))
(error (goto-char pt)))
(apply #'concat (nreverse content-strs))))
(cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info)
(when-let ((context (map-elt response :context)))
"Parse a one-shot RESPONSE from the Ollama API."
(when-let ((context
(+ (or (map-elt response :prompt_eval_count) 0)
(or (map-elt response :eval_count) 0))))
(with-current-buffer (plist-get info :buffer)
(setq gptel--ollama-context context)))
(map-elt response :response))
(cl-incf gptel--ollama-token-count context)))
(map-nested-elt response '(:message :content)))
(cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts)
"JSON encode PROMPTS for Ollama."
"JSON encode PROMPTS for sending to ChatGPT."
(let ((prompts-plist
`(:model ,gptel-model
,@prompts
:messages [,@prompts]
:stream ,(or (and gptel-stream gptel-use-curl
(gptel-backend-stream gptel-backend))
:json-false))))
(when gptel--ollama-context
(plist-put prompts-plist :context gptel--ollama-context))
:json-false)))
options-plist)
(when gptel-temperature
(setq options-plist
(plist-put options-plist :temperature
gptel-temperature)))
(when gptel-max-tokens
(setq options-plist
(plist-put options-plist :num_predict
gptel-max-tokens)))
(when options-plist
(plist-put prompts-plist :options options-plist))
prompts-plist))
(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional _max-entries)
(let ((prompts)
(prop (text-property-search-backward
(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional max-entries)
(let ((prompts) (prop))
(while (and
(or (not max-entries) (>= max-entries 0))
(setq prop (text-property-search-backward
'gptel 'response
(when (get-char-property (max (point-min) (1- (point)))
'gptel)
t))))
(if (and (prop-match-p prop)
(prop-match-value prop))
(user-error "No user prompt found!")
(setq prompts (list
:system gptel--system-message
:prompt
(if (prop-match-p prop)
(push (list :role (if (prop-match-value prop) "assistant" "user")
:content
(string-trim
(buffer-substring-no-properties (prop-match-beginning prop)
(prop-match-end prop))
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
(regexp-quote (gptel-prompt-prefix-string)))
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
(regexp-quote (gptel-response-prefix-string))))
"")))
(regexp-quote (gptel-response-prefix-string)))))
prompts)
(and max-entries (cl-decf max-entries)))
(cons (list :role "system"
:content gptel--system-message)
prompts)))
;;;###autoload
@ -106,7 +122,7 @@ Ollama models.")
(name &key curl-args header key models stream
(host "localhost:11434")
(protocol "http")
(endpoint "/api/generate"))
(endpoint "/api/chat"))
"Register an Ollama backend for gptel with NAME.
Keyword arguments: