gptel-ollama: switch to chat API
* gptel-ollama.el (gptel-curl--parse-stream, gptel--parse-response, gptel--request-data, gptel--parse-buffer, gptel--ollama-context, gptel--ollama-token-count, gptel-make-ollama): Switch to Ollama's chat API from the completions API. This makes interacting with Ollama fully stateless, like all the other APIs, and should help significantly with issues like #249 and #279. Support non-streaming responses from Ollama in the process. Remove `gptel--ollama-context` as it is no longer needed. Add a `gptel--ollama-token-count` for tracking token costs. A UI affordance for this is not implemented yet, but is planned.
This commit is contained in:
parent
9b094b8b1e
commit
66a63e6c82
1 changed files with 56 additions and 40 deletions
|
@ -34,14 +34,16 @@
|
||||||
(:copier nil)
|
(:copier nil)
|
||||||
(:include gptel-backend)))
|
(:include gptel-backend)))
|
||||||
|
|
||||||
(defvar-local gptel--ollama-context nil
|
(defvar-local gptel--ollama-token-count 0
|
||||||
"Context for ollama conversations.
|
"Token count for ollama conversations.
|
||||||
|
|
||||||
This variable holds the context array for conversations with
|
This variable holds the total token count for conversations with
|
||||||
Ollama models.")
|
Ollama models.
|
||||||
|
|
||||||
|
Intended for internal use only.")
|
||||||
|
|
||||||
(cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info)
|
(cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info)
|
||||||
";TODO: "
|
"Parse response stream for the Ollama API."
|
||||||
(when (bobp)
|
(when (bobp)
|
||||||
(re-search-forward "^{")
|
(re-search-forward "^{")
|
||||||
(forward-line 0))
|
(forward-line 0))
|
||||||
|
@ -50,55 +52,69 @@ Ollama models.")
|
||||||
(while (setq content (gptel--json-read))
|
(while (setq content (gptel--json-read))
|
||||||
(setq pt (point))
|
(setq pt (point))
|
||||||
(let ((done (map-elt content :done))
|
(let ((done (map-elt content :done))
|
||||||
(response (map-elt content :response)))
|
(response (map-nested-elt content '(:message :content))))
|
||||||
(push response content-strs)
|
(push response content-strs)
|
||||||
(unless (eq done :json-false)
|
(unless (eq done :json-false)
|
||||||
(with-current-buffer (plist-get info :buffer)
|
(with-current-buffer (plist-get info :buffer)
|
||||||
(setq gptel--ollama-context (map-elt content :context)))
|
(cl-incf gptel--ollama-token-count
|
||||||
|
(+ (or (map-elt content :prompt_eval_count) 0)
|
||||||
|
(or (map-elt content :eval_count) 0))))
|
||||||
(goto-char (point-max)))))
|
(goto-char (point-max)))))
|
||||||
(error (goto-char pt)))
|
(error (goto-char pt)))
|
||||||
(apply #'concat (nreverse content-strs))))
|
(apply #'concat (nreverse content-strs))))
|
||||||
|
|
||||||
(cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info)
|
(cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info)
|
||||||
(when-let ((context (map-elt response :context)))
|
"Parse a one-shot RESPONSE from the Ollama API."
|
||||||
|
(when-let ((context
|
||||||
|
(+ (or (map-elt response :prompt_eval_count) 0)
|
||||||
|
(or (map-elt response :eval_count) 0))))
|
||||||
(with-current-buffer (plist-get info :buffer)
|
(with-current-buffer (plist-get info :buffer)
|
||||||
(setq gptel--ollama-context context)))
|
(cl-incf gptel--ollama-token-count context)))
|
||||||
(map-elt response :response))
|
(map-nested-elt response '(:message :content)))
|
||||||
|
|
||||||
(cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts)
|
(cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts)
|
||||||
"JSON encode PROMPTS for Ollama."
|
"JSON encode PROMPTS for sending to ChatGPT."
|
||||||
(let ((prompts-plist
|
(let ((prompts-plist
|
||||||
`(:model ,gptel-model
|
`(:model ,gptel-model
|
||||||
,@prompts
|
:messages [,@prompts]
|
||||||
:stream ,(or (and gptel-stream gptel-use-curl
|
:stream ,(or (and gptel-stream gptel-use-curl
|
||||||
(gptel-backend-stream gptel-backend))
|
(gptel-backend-stream gptel-backend))
|
||||||
:json-false))))
|
:json-false)))
|
||||||
(when gptel--ollama-context
|
options-plist)
|
||||||
(plist-put prompts-plist :context gptel--ollama-context))
|
(when gptel-temperature
|
||||||
|
(setq options-plist
|
||||||
|
(plist-put options-plist :temperature
|
||||||
|
gptel-temperature)))
|
||||||
|
(when gptel-max-tokens
|
||||||
|
(setq options-plist
|
||||||
|
(plist-put options-plist :num_predict
|
||||||
|
gptel-max-tokens)))
|
||||||
|
(when options-plist
|
||||||
|
(plist-put prompts-plist :options options-plist))
|
||||||
prompts-plist))
|
prompts-plist))
|
||||||
|
|
||||||
(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional _max-entries)
|
(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional max-entries)
|
||||||
(let ((prompts)
|
(let ((prompts) (prop))
|
||||||
(prop (text-property-search-backward
|
(while (and
|
||||||
|
(or (not max-entries) (>= max-entries 0))
|
||||||
|
(setq prop (text-property-search-backward
|
||||||
'gptel 'response
|
'gptel 'response
|
||||||
(when (get-char-property (max (point-min) (1- (point)))
|
(when (get-char-property (max (point-min) (1- (point)))
|
||||||
'gptel)
|
'gptel)
|
||||||
t))))
|
t))))
|
||||||
(if (and (prop-match-p prop)
|
(push (list :role (if (prop-match-value prop) "assistant" "user")
|
||||||
(prop-match-value prop))
|
:content
|
||||||
(user-error "No user prompt found!")
|
|
||||||
(setq prompts (list
|
|
||||||
:system gptel--system-message
|
|
||||||
:prompt
|
|
||||||
(if (prop-match-p prop)
|
|
||||||
(string-trim
|
(string-trim
|
||||||
(buffer-substring-no-properties (prop-match-beginning prop)
|
(buffer-substring-no-properties (prop-match-beginning prop)
|
||||||
(prop-match-end prop))
|
(prop-match-end prop))
|
||||||
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
|
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
|
||||||
(regexp-quote (gptel-prompt-prefix-string)))
|
(regexp-quote (gptel-prompt-prefix-string)))
|
||||||
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
|
(format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
|
||||||
(regexp-quote (gptel-response-prefix-string))))
|
(regexp-quote (gptel-response-prefix-string)))))
|
||||||
"")))
|
prompts)
|
||||||
|
(and max-entries (cl-decf max-entries)))
|
||||||
|
(cons (list :role "system"
|
||||||
|
:content gptel--system-message)
|
||||||
prompts)))
|
prompts)))
|
||||||
|
|
||||||
;;;###autoload
|
;;;###autoload
|
||||||
|
@ -106,7 +122,7 @@ Ollama models.")
|
||||||
(name &key curl-args header key models stream
|
(name &key curl-args header key models stream
|
||||||
(host "localhost:11434")
|
(host "localhost:11434")
|
||||||
(protocol "http")
|
(protocol "http")
|
||||||
(endpoint "/api/generate"))
|
(endpoint "/api/chat"))
|
||||||
"Register an Ollama backend for gptel with NAME.
|
"Register an Ollama backend for gptel with NAME.
|
||||||
|
|
||||||
Keyword arguments:
|
Keyword arguments:
|
||||||
|
|
Loading…
Add table
Reference in a new issue