gptel-ollama: switch to chat API

* gptel-ollama.el (gptel-curl--parse-stream, gptel--parse-response, gptel--request-data, gptel--parse-buffer, gptel--ollama-context, gptel--ollama-token-count, gptel-make-ollama): Switch to Ollama's chat API from the completions API. This makes interacting with Ollama fully stateless, like all the other APIs, and should help significantly with issues like #249 and #279. Support non-streaming responses from Ollama in the process. Remove `gptel--ollama-context` as it is no longer needed. Add a `gptel--ollama-token-count` for tracking token costs. A UI affordance for this is not implemented yet, but is planned.
2024-04-22 12:21:49 -07:00 · 2024-04-22 12:21:49 -07:00 · 66a63e6c82
commit 66a63e6c82
parent 9b094b8b1e
1 changed files with 56 additions and 40 deletions
--- a/gptel-ollama.el
+++ b/gptel-ollama.el
@ -34,14 +34,16 @@
                            (:copier nil)
                            (:include gptel-backend)))
-(defvar-local gptel--ollama-context nil
+(defvar-local gptel--ollama-token-count 0
-  "Context for ollama conversations.
+  "Token count for ollama conversations.
-This variable holds the context array for conversations with
+This variable holds the total token count for conversations with
-Ollama models.")
+Ollama models.
 Intended for internal use only.")
 (cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info)
-  ";TODO: "
+  "Parse response stream for the Ollama API."
  (when (bobp)
    (re-search-forward "^{")
    (forward-line 0))
@ -50,63 +52,77 @@ Ollama models.")
        (while (setq content (gptel--json-read))
          (setq pt (point))
          (let ((done (map-elt content :done))
-                (response (map-elt content :response)))
+                (response (map-nested-elt content '(:message :content))))
            (push response content-strs)
            (unless (eq done :json-false)
              (with-current-buffer (plist-get info :buffer)
-                (setq gptel--ollama-context (map-elt content :context)))
+                (cl-incf gptel--ollama-token-count
                         (+ (or (map-elt content :prompt_eval_count) 0)
                            (or (map-elt content :eval_count) 0))))
              (goto-char (point-max)))))
      (error (goto-char pt)))
    (apply #'concat (nreverse content-strs))))
 (cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info)
-  (when-let ((context (map-elt response :context)))
+  "Parse a one-shot RESPONSE from the Ollama API."
  (when-let ((context
              (+ (or (map-elt response :prompt_eval_count) 0)
                 (or (map-elt response :eval_count) 0))))
    (with-current-buffer (plist-get info :buffer)
-      (setq gptel--ollama-context context)))
+      (cl-incf gptel--ollama-token-count context)))
-  (map-elt response :response))
+  (map-nested-elt response '(:message :content)))
 (cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts)
-  "JSON encode PROMPTS for Ollama."
+  "JSON encode PROMPTS for sending to ChatGPT."
  (let ((prompts-plist
         `(:model ,gptel-model
-           ,@prompts
+           :messages [,@prompts]
           :stream ,(or (and gptel-stream gptel-use-curl
-                             (gptel-backend-stream gptel-backend))
+                         (gptel-backend-stream gptel-backend))
-                     :json-false))))
+                     :json-false)))
-    (when gptel--ollama-context
+        options-plist)
-      (plist-put prompts-plist :context gptel--ollama-context))
+    (when gptel-temperature
      (setq options-plist
            (plist-put options-plist :temperature
                       gptel-temperature)))
    (when gptel-max-tokens
      (setq options-plist
            (plist-put options-plist :num_predict
                       gptel-max-tokens)))
    (when options-plist
      (plist-put prompts-plist :options options-plist))
    prompts-plist))
-(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional _max-entries)
+(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional max-entries)
-  (let ((prompts)
+  (let ((prompts) (prop))
-        (prop (text-property-search-backward
+    (while (and
-               'gptel 'response
+            (or (not max-entries) (>= max-entries 0))
-               (when (get-char-property (max (point-min) (1- (point)))
+            (setq prop (text-property-search-backward
-                                        'gptel)
+                        'gptel 'response
-                 t))))
+                        (when (get-char-property (max (point-min) (1- (point)))
-    (if (and (prop-match-p prop)
+                                                 'gptel)
-             (prop-match-value prop))
+                          t))))
-        (user-error "No user prompt found!")
+      (push (list :role (if (prop-match-value prop) "assistant" "user")
-      (setq prompts (list
+                  :content
-                     :system gptel--system-message
+                  (string-trim
-                     :prompt
+                   (buffer-substring-no-properties (prop-match-beginning prop)
-                     (if (prop-match-p prop)
+                                                   (prop-match-end prop))
-                         (string-trim
+                   (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
-                          (buffer-substring-no-properties (prop-match-beginning prop)
+                           (regexp-quote (gptel-prompt-prefix-string)))
-                                                          (prop-match-end prop))
+                   (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
-                          (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
+                           (regexp-quote (gptel-response-prefix-string)))))
-                                  (regexp-quote (gptel-prompt-prefix-string)))
+            prompts)
-                          (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
+      (and max-entries (cl-decf max-entries)))
-                                  (regexp-quote (gptel-response-prefix-string))))
+    (cons (list :role "system"
-                       "")))
+                :content gptel--system-message)
-      prompts)))
+          prompts)))
 ;;;###autoload
 (cl-defun gptel-make-ollama
    (name &key curl-args header key models stream
          (host "localhost:11434")
          (protocol "http")
-          (endpoint "/api/generate"))
+          (endpoint "/api/chat"))
  "Register an Ollama backend for gptel with NAME.
 Keyword arguments: