gptel: Improve markdown converter

* gptel.el (gptel--stream-convert-markdown->org,
gptel--replace-source-marker): Handle backquote conversion much
better during stream-based conversion.  One-shot conversions still
require fixing.
This commit is contained in:
Karthik Chikmagalur 2024-03-13 00:48:22 -07:00
parent d502ad8ecb
commit 9925dc91b4

View file

@ -1325,6 +1325,22 @@ elements."
(delete-char -1) (insert "-")))))) (delete-char -1) (insert "-"))))))
(buffer-string))) (buffer-string)))
(defun gptel--replace-source-marker (num-ticks &optional end)
"Replace markdown style backticks with Org equivalents.
NUM-TICKS is the number of backticks being replaced. If END is
true these are \"ending\" backticks.
This is intended for use in the markdown to org stream converter."
(let ((from (match-beginning 0)))
(delete-region from (point))
(if (and (= num-ticks 3)
(save-excursion (beginning-of-line)
(skip-chars-forward " \t")
(eq (point) from)))
(insert (if end "#+end_src" "#+begin_src "))
(insert "="))))
(defun gptel--stream-convert-markdown->org () (defun gptel--stream-convert-markdown->org ()
"Return a Markdown to Org converter. "Return a Markdown to Org converter.
@ -1334,6 +1350,7 @@ text stream."
(letrec ((in-src-block nil) ;explicit nil to address BUG #183 (letrec ((in-src-block nil) ;explicit nil to address BUG #183
(temp-buf (generate-new-buffer-name "*gptel-temp*")) (temp-buf (generate-new-buffer-name "*gptel-temp*"))
(start-pt (make-marker)) (start-pt (make-marker))
(ticks-total 0)
(cleanup-fn (cleanup-fn
(lambda (&rest _) (lambda (&rest _)
(when (buffer-live-p (get-buffer temp-buf)) (when (buffer-live-p (get-buffer temp-buf))
@ -1342,29 +1359,45 @@ text stream."
(remove-hook 'gptel-post-response-functions cleanup-fn)))) (remove-hook 'gptel-post-response-functions cleanup-fn))))
(add-hook 'gptel-post-response-functions cleanup-fn) (add-hook 'gptel-post-response-functions cleanup-fn)
(lambda (str) (lambda (str)
(let ((noop-p)) (let ((noop-p) (ticks 0))
(with-current-buffer (get-buffer-create temp-buf) (with-current-buffer (get-buffer-create temp-buf)
(save-excursion (goto-char (point-max)) (save-excursion (goto-char (point-max)) (insert str))
(insert str))
(when (marker-position start-pt) (goto-char start-pt)) (when (marker-position start-pt) (goto-char start-pt))
(when in-src-block (setq ticks ticks-total))
(save-excursion (save-excursion
(while (re-search-forward "`\\|\\*\\{1,2\\}\\|_" nil t) (while (re-search-forward "`\\|\\*\\{1,2\\}\\|_" nil t)
(pcase (match-string 0) (pcase (match-string 0)
("`" ("`"
;; Count number of consecutive backticks
(backward-char)
(while (and (char-after) (eq (char-after) ?`))
(forward-char)
(if in-src-block (cl-decf ticks) (cl-incf ticks)))
;; Set the verbatim state of the parser
(if (and (eobp)
;; Special case heuristic: If the response ends with
;; ^``` we don't wait for more input.
;; FIXME: This can have false positives.
(not (save-excursion (beginning-of-line)
(looking-at "^```$"))))
;; End of input => there could be more backticks coming,
;; so we wait for more input
(progn (setq noop-p t) (set-marker start-pt (match-beginning 0)))
;; We reached a character other than a backtick
(cond (cond
((looking-at "``") ;; Ticks balanced, end src block
(backward-char 1) ((= ticks 0)
(delete-char 3) (progn (setq in-src-block nil)
(if in-src-block (gptel--replace-source-marker ticks-total 'end)))
(progn (insert "#+end_src") ;; Positive number of ticks, start an src block
(setq in-src-block nil)) ((and (> ticks 0) (not in-src-block))
(insert "#+begin_src ") (setq ticks-total ticks
(setq in-src-block t))) in-src-block t)
((looking-at "`\\|$") (gptel--replace-source-marker ticks-total))
(setq noop-p t) ;; Negative number of ticks or in a src block already,
(set-marker start-pt (1- (point))) ;; reset ticks
(unless (eobp) (forward-char 1))) (t (setq ticks ticks-total)))))
((not in-src-block) (replace-match "=")))) ;; Handle other chars: emphasis, bold and bullet items
((and "**" (guard (not in-src-block))) ((and "**" (guard (not in-src-block)))
(cond (cond
((looking-at "\\*\\(?:[[:word:]]\\|\s\\)") ((looking-at "\\*\\(?:[[:word:]]\\|\s\\)")