;;; deja.el --- Deja newsgroup article search for nnweb ;; Author: Lars Magne Ingebrigtsen ;; Maintainer: Stephen Tse ;; License: GNU Public License ;; Update: July 9, 1999 ;; Where: http://www.sfu.ca/~stephent/emacs/deja.el ;; Keywords: news ;;; Commentary: ;; ;; nnweb is a nice piece of work, but is well-known to be broken for Deja ;; search. I personally do Deja search daily and get annoyed and thus start ;; this random hack. I separate it from the original nnweb.el file because ;; this file needs constant updates to catch up with changes of ;; www.deja.com interface. It works only in XEmacs for this moment. ;; ;; ;; ;; INSTALLATION ;; ;; Either manually replace functions in nnweb.el with those here, or ;; (1) put (require 'deja) at the end of nnweb.el ;; (2) put this file in elisp directory ;; ;; Use comma to separate search keywords. You may also want ;; (setq gnus-asynchronous t) ;; (setq gnus-use-article-prefetch 5). ;; ;; ;; ;; DEVELOPER NOTES ;; ;; I put tag near regexp that Deja may change. I use non-greedy ;; quantifier *? available only in XEmacs. FSF Emacs does not support it. ;; Use [^\001] instead of .* if appropriate can speed up matching. ;; ;; Any taker? ;; * get message of multiple segment ;; * display URL of article for reference ;; * mule in article buffer ;;; Code: ;; automagically update web copy ;; (write-region (point-min) (point-max) "/stephent@ftp.sfu.ca:~/pub_html/emacs/deja.el") (defvar nnweb-deja-max-hits 200 "`nnweb-max-hits' for deja.") (defun nnweb-dejanews-search (search) (url-insert-file-contents (concat (nnweb-definition 'address) "?" ;; (nnweb-encode-www-form-urlencoded `(("QRY" . ,search) ("defaultOp" . "AND") ("DBS" . "1") ("OP" . "dnquery.xp") ("ST" . "PS") ("LNG" . "ALL") ("subjects" . "") ("groups" . "") ("authors" . "") ("fromdate" . "") ("todate" . "") ("showsort" . "score") ("maxhits" . "100")))))) (defun nnweb-dejanews-create-mapping () "Perform the search and create an number-to-url alist." (save-excursion (set-buffer nnweb-buffer) (erase-buffer) ;; insert search result (when (funcall (nnweb-definition 'search) nnweb-search) (let ((hit-count 0) (next-result t) (case-fold-search t) (active (or (cadr (assoc nnweb-group nnweb-group-alist)) (cons 1 0))) subject date newsgroup author map url) (while next-result ;; find if next results (goto-char (point-min)) ;; next result (re-search-forward "href=\"\\([^\"]+\\)\">next message" nil t) (setq next-result (match-string 1)) ;; start over, next result may come after article list (goto-char (point-min)) ;; otherwise no matching (when (search-forward "author" nil t) ;; article-list (re-search-forward ".*\\([^\001]*?\\)[^\001]*" nil t) (replace-match "\\1") (goto-char (point-min)) (nnweb-decode-entities) ;; go through all the article hits on this page. (goto-char (point-min)) (while (re-search-forward ;; divide into multiparts to make it easier to understand ;; edebug does not support \ continuation ;; (concat ;; date "\\(.+\\).*\n" ;; url ".*\n.*href=\"\\([^\"]+\\)\">\n" ;; subject "[ \t]*\\(.+\\)\n" ;; newsgroup ".*\n.*\n.*\\(.+\\).*\n" ;; author ".*size=2>\\(.+\\)") nil t) (setq date (match-string 1) url (match-string 2) subject (match-string 3) newsgroup (match-string 4) author (match-string 5)) (incf hit-count) (unless (nnweb-get-hashtb url) (push (list (incf (cdr active)) (make-full-mail-header (cdr active) (format "%s(%s)" subject newsgroup) author date (concat "<" (nnweb-identifier url) "@dejanews>") nil 0 0 url)) map) (nnweb-set-hashtb (cadar map) (car map)))) (if (or (not next-result) (>= hit-count nnweb-deja-max-hits)) (setq next-result nil) (erase-buffer) (url-insert-file-contents next-result)))) (setq next-result nil) ;; return the articles in the right order. (setq nnweb-articles (sort (nconc nnweb-articles map) 'car-less-than-car)))))) (defun nnweb-dejanews-wash-article () (let ((case-fold-search t)) ;; otherwise invalid article (when (search-forward "subject" nil t) (delete-region (point-min) (point)) (re-search-forward ;; (concat ;; subject ".*\\(.+\\)" ;; date "[^\001]*date:.*\\(.+\\)" ;; from, email "[^\001]*author:.*\\(.+\\).*href=\"mailto:\\([^\"]*\\)\"" ;; body "[^\001]*\n[^\001]*") nil t) (replace-match "From: \\3 (\\4)\nSubject: \\1\nDate: \\2\n\n\\5") (goto-char (point-min)) ;; replace
before nnweb-remove-markup (while (search-forward "
" nil t) (replace-match "\n")) (goto-char (point-min)) (nnweb-remove-markup)))) (provide 'deja) ;;; deja.el ends here