File: //usr/share/emacs/27.1/lisp/gnus/spam-stat.elc
;ELC   
;;; Compiled
;;; in Emacs version 27.1
;;; with all optimizations.
;;; This file uses dynamic docstrings, first added in Emacs 19.29.
;;; This file does not contain utf-8 non-ASCII characters,
;;; and so can be loaded in Emacs versions earlier than 23.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(byte-code "\300\301!\210\302\303\304\305\306\307\310\311&\210\312\313\314\315\316\317\310\303&\210\312\320\321\322\316\323\310\303&\210\312\324\325\326\316\327\310\303&\210\312\330\331\332\316\327\310\303&\210\312\333\334\335\316\336\310\303&\210\312\337\340\341\316\323\310\303&\210\312\342\304\343\316\344\310\303&\210\312\345\304\346\316\347\310\303&\210\312\350\351\352\316\323\310\303&\207" [require mail-parse custom-declare-group spam-stat nil "Statistical spam detection for Emacs.\nUse the functions to build a dictionary of words and their statistical\ndistribution in spam and non-spam mails.  Then use a function to determine\nwhether a buffer contains spam or not." :version "22.1" :group gnus custom-declare-variable spam-stat-file "~/.spam-stat.el" "File used to save and load the dictionary.\nSee `spam-stat-to-hash-table' for the format of the file." :type file spam-stat-unknown-word-score 0.2 "The score to use for unknown words.\nAlso used for words that don't appear often enough." number spam-stat-max-word-length 15 "Only words shorter than this will be considered." integer spam-stat-max-buffer-length 10240 "Only the beginning of buffers will be analyzed.\nThis variable says how many characters this will be." spam-stat-split-fancy-spam-group "mail.spam" "Name of the group where spam should be stored.\nIf `spam-stat-split-fancy' is used in fancy splitting rules.  Has\nno effect when spam-stat is invoked through spam.el." string spam-stat-split-fancy-spam-threshold 0.9 "Spam score threshold in spam-stat-split-fancy." spam-stat-washing-hook "Hook applied to each message before analysis." hook spam-stat-score-buffer-user-functions "List of additional scoring functions.\nCalled one by one on the buffer.\n\nIf all of these functions return non-nil answers, these numerical\nanswers are added to the computed spam stat score on the buffer.  If\nyou defun such functions, make sure they don't return the buffer in a\nnarrowed state or such: use, for example, `save-excursion'.  Each of\nyour functions is also passed the initial spam-stat score which might\naid in your scoring.\n\nAlso be careful when defining such functions.  If they take a long\ntime, they will slow down your mail splitting.  Thus, if the buffer is\nlarge, don't forget to use smaller regions, by wrapping your work in,\nsay, `with-spam-stat-max-buffer-size'." (repeat sexp) spam-stat-process-directory-age 90 "Max. age of files to be processed in directory, in days.\nWhen using `spam-stat-process-spam-directory' or\n`spam-stat-process-non-spam-directory', only files that have\nbeen touched in this many days will be considered.  Without\nthis filter, re-training spam-stat with several thousand messages\nwill start to take a very long time."] 8)
#@57 Time stamp of last change of spam-stat-file on this run
(defvar spam-stat-last-saved-at nil (#$ . 3171))
#@129 Syntax table used when processing mails for statistical analysis.
The important part is which characters are word constituents.
(defvar spam-stat-syntax-table (byte-code "\302!\303\304\305	#\210\303\306\305	#\210\303\307\305	#\210\303\310\305	#\210\303\311\305	#\210\303\312\305	#\210	)\207" [text-mode-syntax-table table copy-syntax-table modify-syntax-entry 45 "w" 95 46 33 63 43] 4) (#$ . 3283))
#@46 Whether the spam-stat database needs saving.
(defvar spam-stat-dirty nil (#$ . 3690))
#@78 Buffer to use for scoring while splitting.
This is set by hooking into Gnus.
(defvar spam-stat-buffer nil (#$ . 3782))
#@33 Name of the `spam-stat-buffer'.
(defvar spam-stat-buffer-name " *spam stat buffer*" (#$ . 3907))
#@42 Coding system used for `spam-stat-file'.
(defvar spam-stat-coding-system (byte-code "\300\301!\203 \301\207\302\207" [mm-coding-system-p emacs-mule raw-text] 2) (#$ . 4010))
#@59 Store a copy of the current buffer in `spam-stat-buffer'.
(defalias 'spam-stat-store-current-buffer #[nil "pr\303	!q\210\304 \210\305!\210p\211*\207" [buf spam-stat-buffer-name spam-stat-buffer get-buffer-create erase-buffer insert-buffer-substring] 2 (#$ . 4191)])
#@93 Store a copy of the current article in `spam-stat-buffer'.
This uses `gnus-article-buffer'.
(defalias 'spam-stat-store-gnus-article-buffer #[nil "rq\210\301 )\207" [gnus-original-article-buffer spam-stat-store-current-buffer] 1 (#$ . 4466)])
#@269 Hash table used to store the statistics.
Use `spam-stat-load' to load the file.
Every word is used as a key in this table.  The value is a vector.
Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
`spam-stat-bad', and `spam-stat-score' to access this vector.
(defvar spam-stat (make-hash-table :test 'equal) (#$ . 4716))
#@45 The number of good mails in the dictionary.
(defvar spam-stat-ngood 0 (#$ . 5052))
#@44 The number of bad mails in the dictionary.
(defvar spam-stat-nbad 0 (#$ . 5141))
#@59 A holder for condition-case errors while scoring buffers.
(defvar spam-stat-error-holder nil (#$ . 5228))
#@61 Return the number of times this word belongs to good mails.
(defalias 'spam-stat-good #[(entry) "\301H\207" [entry 0] 2 (#$ . 5340)])
(put 'spam-stat-good 'byte-optimizer 'byte-compile-inline-expand)
#@60 Return the number of times this word belongs to bad mails.
(defalias 'spam-stat-bad #[(entry) "\301H\207" [entry 1] 2 (#$ . 5547)])
(put 'spam-stat-bad 'byte-optimizer 'byte-compile-inline-expand)
#@29 Set the score of this word.
(defalias 'spam-stat-score #[(entry) "\203 \302H\207	\207" [entry spam-stat-unknown-word-score 2] 2 (#$ . 5751)])
(put 'spam-stat-score 'byte-optimizer 'byte-compile-inline-expand)
#@58 Set the number of times this word belongs to good mails.
(defalias 'spam-stat-set-good #[(entry value) "\302	I\207" [entry value 0] 3 (#$ . 5969)])
(put 'spam-stat-set-good 'byte-optimizer 'byte-compile-inline-expand)
#@57 Set the number of times this word belongs to bad mails.
(defalias 'spam-stat-set-bad #[(entry value) "\302	I\207" [entry value 1] 3 (#$ . 6194)])
(put 'spam-stat-set-bad 'byte-optimizer 'byte-compile-inline-expand)
#@29 Set the score of this word.
(defalias 'spam-stat-set-score #[(entry value) "\302	I\207" [entry value 2] 3 (#$ . 6416)])
(put 'spam-stat-set-score 'byte-optimizer 'byte-compile-inline-expand)
#@44 Return a vector with the given properties.
(defalias 'spam-stat-make-entry #[(good bad) "\304	\305#\211\306\n!\211\307I*\210\n)\207" [good bad entry value vector nil spam-stat-compute-score 2] 5 (#$ . 6614)])
(put 'spam-stat-make-entry 'byte-optimizer 'byte-compile-inline-expand)
#@50 Compute the score of this word.  1.0 means spam.
(defalias 'spam-stat-compute-score #[(entry) "\305\211\306H)_\307\211\310H)!	\n\\\311W\203 \312\202A \306U\203( \313\202A \f\306U\2032 \314\202A \315\316\n\f\245	\245\n\f\245\\\245^]*\207" [entry g b spam-stat-ngood spam-stat-nbad 2.0 0 float 1 5 0.2 0.99 0.01 0.01 0.99] 7 (#$ . 6906)])
#@72 Narrow the buffer down to the first 4k characters, then evaluate BODY.
(defalias 'with-spam-stat-max-buffer-size '(macro . #[(&rest body) "\301\302BB\207" [body save-restriction (when (> (- (point-max) (point-min)) spam-stat-max-buffer-length) (narrow-to-region (point-min) (+ (point-min) spam-stat-max-buffer-length)))] 3 (#$ . 7258)]))
#@71 Return a hash table of words and number of occurrences in the buffer.
(defalias 'spam-stat-buffer-words #[nil "\306\307!\210\214deZV\203 ee\\}\210\310 p\311\216\312!\210eb\210\313\314\315\"\316\211\317\320\316\321#\203Q \322\323!\324
\323#T
GW\203+ \325
\f#\210\202+ .\207" [spam-stat-max-buffer-length #1=#:buffer #2=#:table spam-stat-syntax-table count word run-hooks spam-stat-washing-hook syntax-table #[nil "rq\210\302	!)\207" [#1# #2# set-syntax-table] 2] set-syntax-table make-hash-table :test equal nil re-search-forward "\\w+" t match-string-no-properties 0 gethash puthash result spam-stat-max-word-length] 4 (#$ . 7603)])
#@48 Consider current buffer to be a new spam mail.
(defalias 'spam-stat-buffer-is-spam #[nil "T\302\303\304 \"\210\305\211\207" [spam-stat-nbad spam-stat-dirty maphash #[(word count) "\306	\"\211\203 \n\n\211\307H)\\\211\307\fI*\210\2028 \310\311
\312#\211\313\n!\211\314\fI*\210\n+\n\313\n!\211\314\fI*\210\315\n	#)\207" [word spam-stat entry count value bad gethash 1 0 vector nil spam-stat-compute-score 2 puthash good] 5] spam-stat-buffer-words t] 3 (#$ . 8264)])
#@52 Consider current buffer to be a new non-spam mail.
(defalias 'spam-stat-buffer-is-non-spam #[nil "T\302\303\304 \"\210\305\211\207" [spam-stat-ngood spam-stat-dirty maphash #[(word count) "\306	\"\211\203 \n\n\211\307H)\\\211\307\fI*\210\2028 \307
\310
\311#\211\312\n!\211\313\fI*\210\n+\n\312\n!\211\313\fI*\210\314\n	#)\207" [word spam-stat entry count value bad gethash 0 vector nil spam-stat-compute-score 2 puthash good] 5] spam-stat-buffer-words t] 3 (#$ . 8757)])
(autoload 'gnus-message "gnus-util")
#@57 Consider current buffer no longer normal mail but spam.
(defalias 'spam-stat-buffer-change-to-spam #[nil "T	S\303\304\305 \"\210\306\211\207" [spam-stat-nbad spam-stat-ngood spam-stat-dirty maphash #[(word count) "\305	\"\211\204 \306\307\310\"\202C \n\211\211\311H)Z\211\311\fI*\210\n\211\211\312H)\\\211\312\fI*\210\n\313\n!\211\314\fI*\210\315\n	#)\207" [word spam-stat entry count value gethash gnus-message 8 "This buffer has unknown words in it" 0 1 spam-stat-compute-score 2 puthash] 5] spam-stat-buffer-words t] 3 (#$ . 9294)])
#@57 Consider current buffer no longer spam but normal mail.
(defalias 'spam-stat-buffer-change-to-non-spam #[nil "S	T\303\304\305 \"\210\306\211\207" [spam-stat-nbad spam-stat-ngood spam-stat-dirty maphash #[(word count) "\305	\"\211\204 \306\307\310\"\202C \n\211\211\311H)\\\211\311\fI*\210\n\211\211\312H)Z\211\312\fI*\210\n\313\n!\211\314\fI*\210\315\n	#)\207" [word spam-stat entry count value gethash gnus-message 8 "This buffer has unknown words in it" 0 1 spam-stat-compute-score 2 puthash] 5] spam-stat-buffer-words t] 3 (#$ . 9854)])
#@92 Save the `spam-stat' hash table as lisp file.
With a prefix argument save unconditionally.
(defalias 'spam-stat-save #[(&optional force) "\204 	\205S \n\f\306\307!\310\216r
q\210p\311\312\n\"c\210\311\313#c\210\314\315\"\210\316c\210*r
q\210\317\320\211\320\321%\210-\322\323\f\"\210\320\324\f!\3258\262\211\207" [force spam-stat-dirty spam-stat-coding-system coding-system-for-write spam-stat-file #1=#:temp-buffer generate-new-buffer " *temp file*" #[nil "\301!\205	 \302!\207" [#1# buffer-name kill-buffer] 2] format ";-*- coding: %s; -*-\n" "(setq spam-stat-ngood %d spam-stat-nbad %d\nspam-stat (spam-stat-to-hash-table '(" maphash #[(word entry) "\302	\211\303H)	\211\304H)E!\207" [word entry prin1 0 1] 6] ")))" write-region nil 0 message "Saved %s." file-attributes 5 #2=#:temp-file standard-output spam-stat-ngood spam-stat-nbad spam-stat spam-stat-last-saved-at] 6 (#$ . 10418) "P"])
#@44 Read the `spam-stat' hash table from disk.
(defalias 'spam-stat-load #[nil "\n\203\f \305\306!\2029 \307\303!\203# \203# \310\f!\3118\262\232\2046 \312\f!\210\313\310\f!\3118\262\211\2029 \305\314!)\207" [spam-stat-coding-system coding-system-for-read spam-stat-dirty spam-stat-last-saved-at spam-stat-file message "Spam stat not loaded: spam-stat-dirty t" boundp file-attributes 5 load-file nil "Spam stat file not loaded: no change in disk."] 4 (#$ . 11344)])
#@370 Turn list ENTRIES into a hash table and store as `spam-stat'.
Every element in ENTRIES has the form (WORD GOOD BAD) where WORD is
the word string, NGOOD is the number of good mails it has appeared in,
NBAD is the number of bad mails it has appeared in, GOOD is the number
of times it appeared in good mails, and BAD is the number of times it
has appeared in bad mails.
(defalias 'spam-stat-to-hash-table #[(entries) "\302\303G\304\305$\306\307\"\210	)\207" [entries table make-hash-table :size :test equal mapc #[(l) "\306@A@\3078\310\n	\311#\211\312!\211\307\fI*\210+
#\207" [l bad good entry value table puthash 2 vector nil spam-stat-compute-score] 6]] 5 (#$ . 11823)])
#@76 Reset `spam-stat' to an empty hash-table.
This deletes all the statistics.
(defalias 'spam-stat-reset #[nil "\304\305\306\"\307\211\310\211\207" [spam-stat spam-stat-ngood spam-stat-nbad spam-stat-dirty make-hash-table :test equal 0 t] 4 (#$ . 12515) nil])
#@60 Raw data used in the last run of `spam-stat-score-buffer'.
(defvar spam-stat-score-data nil (#$ . 12782))
#@105 Return score for WORD.
The default score for unknown words is stored in
`spam-stat-unknown-word-score'.
(defalias 'spam-stat-score-word #[(word) "\304	\"\211\203 \n\305H\202 )\207" [word spam-stat entry spam-stat-unknown-word-score gethash 2] 4 (#$ . 12895)])
(put 'spam-stat-score-word 'byte-optimizer 'byte-compile-inline-expand)
#@245 Process current buffer, return the 15 most conspicuous words.
These are the words whose spam-stat differs the most from 0.5.
The list returned contains elements of the form (WORD SCORE DIFF),
where DIFF is the difference between SCORE and 0.5.
(defalias 'spam-stat-buffer-words-with-scores #[nil "\303\211\211\304\305\306 \"\210\307\n\310\"\311\n\233\303\241\210\n+\207" [score word result nil maphash #[(word ignore) "\306	\"\211\203 \n\307H\202 *\f\310\f\311Z!E
B\211\207" [word spam-stat entry spam-stat-unknown-word-score score result gethash 2 abs 0.5] 5] spam-stat-buffer-words sort #[(a b) "\3028\302	8W\207" [b a 2] 3] 14] 3 (#$ . 13239)])
#@110 Return a score describing the spam-probability for this buffer.
Add user supplied modifications if supplied.
(defalias 'spam-stat-score-buffer #[nil "\306 \307\310\"\311\312	\"\211\211\311\312\307\313	\"\"\\\245\3141$ \315!0\202&