gptel-ollama: switch to chat API

* gptel-ollama.el (gptel-curl--parse-stream, gptel--parse-response, gptel--request-data, gptel--parse-buffer, gptel--ollama-context, gptel--ollama-token-count, gptel-make-ollama): Switch to Ollama's chat API from the completions API. This makes interacting with Ollama fully stateless, like all the other APIs, and should help significantly with issues like #249 and #279. Support non-streaming responses from Ollama in the process. Remove `gptel--ollama-context` as it is no longer needed. Add a `gptel--ollama-token-count` for tracking token costs. A UI affordance for this is not implemented yet, but is planned.
2024-04-22 12:21:49 -07:00 · 2024-04-22 12:21:49 -07:00 · 66a63e6c82
commit 66a63e6c82
parent 9b094b8b1e
1 changed files with 56 additions and 40 deletions
--- a/gptel-ollama.el
+++ b/gptel-ollama.el
@ -34,14 +34,16 @@
                            (:copier nil)
                            (:include gptel-backend)))

-(defvar-local gptel--ollama-context nil
-  "Context for ollama conversations.
+(defvar-local gptel--ollama-token-count 0
+  "Token count for ollama conversations.

-This variable holds the context array for conversations with
-Ollama models.")
+This variable holds the total token count for conversations with
+Ollama models.
+
+Intended for internal use only.")

 (cl-defmethod gptel-curl--parse-stream ((_backend gptel-ollama) info)
-  ";TODO: "
+  "Parse response stream for the Ollama API."
  (when (bobp)
    (re-search-forward "^{")
    (forward-line 0))
@ -50,55 +52,69 @@ Ollama models.")
        (while (setq content (gptel--json-read))
          (setq pt (point))
          (let ((done (map-elt content :done))
-                (response (map-elt content :response)))
+                (response (map-nested-elt content '(:message :content))))
            (push response content-strs)
            (unless (eq done :json-false)
              (with-current-buffer (plist-get info :buffer)
-                (setq gptel--ollama-context (map-elt content :context)))
+                (cl-incf gptel--ollama-token-count
+                         (+ (or (map-elt content :prompt_eval_count) 0)
+                            (or (map-elt content :eval_count) 0))))
              (goto-char (point-max)))))
      (error (goto-char pt)))
    (apply #'concat (nreverse content-strs))))

 (cl-defmethod gptel--parse-response ((_backend gptel-ollama) response info)
-  (when-let ((context (map-elt response :context)))
+  "Parse a one-shot RESPONSE from the Ollama API."
+  (when-let ((context
+              (+ (or (map-elt response :prompt_eval_count) 0)
+                 (or (map-elt response :eval_count) 0))))
    (with-current-buffer (plist-get info :buffer)
-      (setq gptel--ollama-context context)))
-  (map-elt response :response))
+      (cl-incf gptel--ollama-token-count context)))
+  (map-nested-elt response '(:message :content)))

 (cl-defmethod gptel--request-data ((_backend gptel-ollama) prompts)
-  "JSON encode PROMPTS for Ollama."
+  "JSON encode PROMPTS for sending to ChatGPT."
  (let ((prompts-plist
         `(:model ,gptel-model
-           ,@prompts
+           :messages [,@prompts]
           :stream ,(or (and gptel-stream gptel-use-curl
                         (gptel-backend-stream gptel-backend))
-                     :json-false))))
-    (when gptel--ollama-context
-      (plist-put prompts-plist :context gptel--ollama-context))
+                     :json-false)))
+        options-plist)
+    (when gptel-temperature
+      (setq options-plist
+            (plist-put options-plist :temperature
+                       gptel-temperature)))
+    (when gptel-max-tokens
+      (setq options-plist
+            (plist-put options-plist :num_predict
+                       gptel-max-tokens)))
+    (when options-plist
+      (plist-put prompts-plist :options options-plist))
    prompts-plist))

-(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional _max-entries)
-  (let ((prompts)
-        (prop (text-property-search-backward
+(cl-defmethod gptel--parse-buffer ((_backend gptel-ollama) &optional max-entries)
+  (let ((prompts) (prop))
+    (while (and
+            (or (not max-entries) (>= max-entries 0))
+            (setq prop (text-property-search-backward
                        'gptel 'response
                        (when (get-char-property (max (point-min) (1- (point)))
                                                 'gptel)
                          t))))
-    (if (and (prop-match-p prop)
-             (prop-match-value prop))
-        (user-error "No user prompt found!")
-      (setq prompts (list
-                     :system gptel--system-message
-                     :prompt
-                     (if (prop-match-p prop)
+      (push (list :role (if (prop-match-value prop) "assistant" "user")
+                  :content
                  (string-trim
                   (buffer-substring-no-properties (prop-match-beginning prop)
                                                   (prop-match-end prop))
                   (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
                           (regexp-quote (gptel-prompt-prefix-string)))
                   (format "[\t\r\n ]*\\(?:%s\\)?[\t\r\n ]*"
-                                  (regexp-quote (gptel-response-prefix-string))))
-                       "")))
+                           (regexp-quote (gptel-response-prefix-string)))))
+            prompts)
+      (and max-entries (cl-decf max-entries)))
+    (cons (list :role "system"
+                :content gptel--system-message)
          prompts)))

 ;;;###autoload
@ -106,7 +122,7 @@ Ollama models.")
    (name &key curl-args header key models stream
          (host "localhost:11434")
          (protocol "http")
-          (endpoint "/api/generate"))
+          (endpoint "/api/chat"))
  "Register an Ollama backend for gptel with NAME.

 Keyword arguments: