;;; deja.el --- Deja newsgroup article search for nnweb ;; Author: Lars Magne Ingebrigtsen ;; Maintainer: Stephen Tse ;; License: GNU Public License ;; Update: Feb 29, 2000 ;; Where: http://www.sfu.ca/~stephent/emacs/deja.el ;; Keywords: news ;;; Commentary: ;; ;; nnweb is a nice piece of work, but is well-known to be broken for Deja ;; search. I personally do Deja search daily and thus start this random ;; hack. I separate it from the original nnweb.el file because this file ;; needs constant updates to catch up with changes of www.deja.com ;; interface. ;; ;; ;;; Installation: ;; ;; Put this file deja.el in load directory and add this to your .emacs: ;; (add-to-list 'after-load-alist '("nnweb" (require 'deja))). ;; ;; Use comma to separate search keywords. You may also want ;; (setq gnus-asynchronous t) ;; (setq gnus-use-article-prefetch 5). ;; ;;; Code: ;; automagically update my web copy ;; (write-region (point-min) (point-max) "/stephent@ftp.sfu.ca:~/pub_html/emacs/deja.el") (defvar nnweb-deja-max-hits 200 "`nnweb-max-hits' for deja.") (defvar nnweb-dejanews-language "ALL" "Article language filter. Check www.deja.com for possible values. On occasions \"english\" should give you a peace of mind.") (defun nnweb-dejanews-search (search) (url-insert-file-contents (concat (nnweb-definition 'address) "?" ;; (nnweb-encode-www-form-urlencoded `(("QRY" . ,search) ("defaultOp" . "AND") ("DBS" . "1") ("OP" . "dnquery.xp") ("ST" . "PS") ("LNG" . ,nnweb-dejanews-language) ("subjects" . "") ("groups" . "") ("authors" . "") ("fromdate" . "") ("todate" . "") ("showsort" . "score") ("maxhits" . "100")))))) (defun nnweb-dejanews-create-mapping () "Perform the search and create an number-to-url alist." (save-excursion (set-buffer nnweb-buffer) (erase-buffer) ;; insert search result (when (funcall (nnweb-definition 'search) nnweb-search) (let ((hit-count 0) (next-result t) (case-fold-search t) (active (or (cadr (assoc nnweb-group nnweb-group-alist)) (cons 1 0))) subject date newsgroup author map url) (while next-result (goto-char (point-min)) ;; next result (if (re-search-forward "href=\"\\([^\"]+\\)\">next message" nil t) (setq next-result (match-string 1)) (setq next-result nil)) ;; start over, next result may come after article list (goto-char (point-min)) ;; otherwise no matching ;; now the list comes after author column label (when (search-forward "author" nil t) ;; article-list ;; clean up, leaving only article list (delete-region (point-min) (point)) (delete-region (re-search-forward "" nil t) (point-max)) (goto-char (point-min)) (nnweb-decode-entities) ;; go through all the article hits on this page. (goto-char (point-min)) (while (re-search-forward ;; divide into multiparts to make it easier to understand ;; edebug does not support \ continuation ;; (concat "\\(.+\\)\n" ; date ".*\n" ; junk line "[ \t]*\n" ; url "[ \t]*\\(.+\\)\n" ; subject ".*\n" ; junk line ".*\n" ; junk line "[ \t]*\\(.+\\)\n" ; newsgroup ".*\n" ; junk line "[ \t]*\\(.*\\)\n") ; author nil t) (setq date (match-string 1) url (match-string 2) subject (match-string 3) newsgroup (match-string 4) author (match-string 5)) (incf hit-count) (unless (nnweb-get-hashtb url) (push (list (incf (cdr active)) (make-full-mail-header (cdr active) (format "%s(%s)" subject newsgroup) author date (concat "<" (nnweb-identifier url) "@dejanews>") nil 0 0 url)) map) (nnweb-set-hashtb (cadar map) (car map)))) (if (or (not next-result) (>= hit-count nnweb-deja-max-hits)) (setq next-result nil) (erase-buffer) (url-insert-file-contents next-result)))) (setq next-result nil) ;; return the articles in the right order. (setq nnweb-articles (sort (nconc nnweb-articles map) 'car-less-than-car)))))) (defun nnweb-dejanews-wash-article () ;; current article in `nnweb-buffer' ;; (switch-to-buffer nnweb-buffer) ;; case-insensitive search (let ((case-fold-search t) (subject "") (date "") (author "") (email "") (body "")) (goto-char (point-min)) ;; ;; subject (if (re-search-forward "subject:.*\\(.+\\)" nil t) (setq subject (match-string 1)) (message "Cannot find SUBJECT in deja article.")) ;; date (if (re-search-forward "date:.*\\(.+\\)" nil t) (setq date (match-string 1)) (message "Cannot find DATE in deja article.")) ;; author (if (re-search-forward "author:.*\n.*\\(.+\\)" nil t) (setq author (match-string 1)) (message "Cannot find AUTHOR in deja article.")) ;; email (if (re-search-forward "mailto:\\([^\"]*\\)\"" nil t) (setq email (match-string 1)) (message "Cannot find EMAIL in deja article.")) ;; body - tricky: hard to define where body region is ;; for now, it's the first table tag after author (if (re-search-forward "" nil t) (point)))) (message "Cannot find BODY in deja article.")) ;; actual cleaning (erase-buffer) (insert (format "Subject: %s Date: %s %s" subject date body)) ;; replace
before nnweb-remove-markup (goto-char (point-min)) (while (search-forward "
" nil t) (replace-match "\n")) (goto-char (point-min)) (nnweb-remove-markup) ;; to avoid conflicts between angle brackets and html syntax, insert ;; author and email after `nnweb-remove-markup' (goto-char (point-min)) (insert (format "From: %s <%s>\n" author email)))) (provide 'deja) ;;; deja.el ends here