;;; -*- Mode:LISP; Package:SPELL; Base:10; Lowercase:T; Readtable:ZL -*- ;;; Copyright LISP Machine, Inc. 1985 See filename "Copyright.Text" for ;;; licensing and release information. ;;; (C) Enhancements Copyright GigaMos Systems Inc., 1987 #| AUDIT TRIAL: - Pace wrote this lisp code using his C coded version as a guide, which was in turn inspired by, and with heuristics lifted from, the PDP-10 ITS version written in MIDAS. - My only change was to take the I from ISPELL. 1/17/85 14:34:00 -gjc - Various mods. 1/26/86 -KmC - Fixed various array subscript violations (in SPELL-...-ENDING's) - Defined *enough-words*, max number of variants to return - Added SPELL-CHECK - alternate interface - Added ADD-TO-DICTIONARY, load new words into dictionary table - Added CHECK-WORDS, run spelling checker against text file - Added COMPLAIN, to let users log complaints - Fixes for 3.0, add auxiliary and private dictionary loads June-Aug '87 -KmC DOCUMENTATION: This is a brute-force spelling checker which seeks to find the root of an input word, as permuted by various methods, in a hash-table dictionary. Note that Zmacs has an interface for a spelling checker. The ZMacs interface thinks it's calling the Chaosnet ISPELL server; it all works by defining the function ZWEI:SPELL-WORD as a synonym for the SPELL-WORD defined below. There are two ways to use SPELL from ZMacs: 1. Press Meta-$, which spell-checks the word near the cursor 2. Press Meta-Roman-1, which spell-checks the region or whole buffer Input to the main interface routines consists of a single word to be checked. (See SPELL-WORD, SPELL-CHECK, WORD-OK-P, WORD-IN-DICTIONARY-P below.) Single-error variations *only* are checked -- that is, a check on the spelling of "fartran" or "fortron" will uncover "fortran", but "fartron" -- which contains two seperate misplaced letters -- will appear to be in error. On the other hand, "fortrna" will win because two-letter transpositions are checked. The permutations generated, in order, are: 1] Wrong letter -- "funni" => "funny" 2] Extra letter -- "funnny" => "funny" 3] Missing letter -- "fnny" => "funny" 4] Transposed letters -- "funyn" => "funny" Possibilities are generated by these methods and then checked, up to the limit set by *enough-words* (defaults to 10). The format of each line in the dictionary text file is //..., where the is in "root" form and the (optional) single-character flag(s) indicate allowable variant forms of the . For example, the entry REOPEN/D/G/S defines the allowed words "REOPEN", "REOPENED", and "REOPENING". FLAG ENDING(S) ROOT/VARIANTS ---- --------- ------------- V -IVE instinct/instinctive, execute/executive N -EN, -ION, -ICATION oak/oaken, notify/notification X -ENS, -IONS, -ICATIONS notify/notifications H -TH, -IETH ten/tenth, forty/fortieth Y -LY final/finally, king/kingly G -ING create/creating, eat/eating J -INGS tide/tidings, bind/bindings D -ED, -IED dread/dreaded, notify/notified T -EST, -IEST grey/greyest, stuffy/stuffiest R -ER, -IER fight/fighter, mighty/mightier Z -ERS, -IERS fight/fighters, joke/jokers S -IES, -ES, -S tent/tents, pantry/pantries P -INESS, -NESS happy/happiness, good/goodness M -'S person/person's (flag isn't required!) FILES: The file(s) required for SPELL-CHECK should be in 'SYS:ZWEI;' or you should redefine the constants below. The files are: SPELL-DICTIONARY.TEXT -- the main dictionary text file; the format is described above SPELL-DICTIONARY2.TEXT -- the auxiliary dictionary text file; same format; just provided as a convenient place to put new, system-wide words SPELL-COMPLAINTS.TEXT -- where user complaints should go, for the system administrator to deal with NOTE: If you move this between Lambda releases 2 and 3, recompile. Release 2, with no GC, needs call to (SI:RESET-TEMPORARY-AREA). |# ;;; Most important customizable parameters ;;;Location of file(s) containing system word list(s): (defconst *dictionary-source-file* "sys:zwei;spell-dictionary.text") (defconst *dictionary-auxiliary-source-file* "sys:zwei;spell-dictionary2.text") ;;;Location of file containing user complaint list: (defvar *complaint-log-file* "sys:site;spell-complaints.text") ;;;Limit on number of variant words to find and return: (defconst *enough-words* 10. "Limit on number of variants to return.") ;;; This code assumes that functions like STRING-EQUAL ignore case on ;;; string comparisons. Here's a chance that this hasn't been defeated. (add-initialization "Set string comparison flag" (setq-globally alphabetic-case-affects-string-comparison nil) '(once)) (defun vowelp (c) (or (char-equal c #/A) (char-equal c #/E) (char-equal c #/I) (char-equal c #/O) (char-equal c #/U))) ;;; Define the character set -- what's in a word? ;;; Make lists of valid/invalid test word characters -- for use when a ;;; word is input and when spell-processing files. (defvar *ok-characters* (loop with ok = nil for i from 1 to 256 with max = 0 when ok collect (1- i) do (progn (when (= i #/A) (setf ok t) (setf max (+ i 26))) (when (= i #/a) (setf ok t) (setf max (+ i 26))) (when (= i #/') (setf ok t) (setf max (+ i 1))) (when (= i max) (setf ok nil)) )) "List of ok characters (allowed in a spellable word)") (defvar *ng-characters* (loop with ok = nil for i from 1 to 256 with max = 0 when (not ok) collect (1- i) do (progn (when (= i #/A) (setf ok t) (setf max (+ i 26))) (when (= i #/a) (setf ok t) (setf max (+ i 26))) (when (= i #/') (setf ok t) (setf max (+ i 1))) (when (= i max) (setf ok nil)) )) "List of invalid characters (not allowed in a spellable word)") ;;; Here we define the dictionary structure and components ;;;The dictionary is a hash table, an art-q array; each entry is 3 Qs. ;;; 0 flags ... fixnum ;;; 1 word ... string-pointer ;;; 2 next ... chain pointer - address of next entry, or -1 for end (defvar *dictionary* nil) (defconst *qs-per-entry* 3 "Number of sub-elements per dictionary entry") (defmacro de-word (n) `(aref *dictionary* (1+ ,n))) (defmacro de-next (n) `(aref *dictionary* (+ ,n 2))) (defmacro de-flags (n) `(aref *dictionary* ,n)) ;;;Flag indicators: (eval-when (eval compile load) (defconst *all-flags* '(#/V #/N #/X #/H #/Y #/G #/J #/D #/T #/R #/Z #/S #/P #/M))) (defmacro de-used (n) `(ldb (byte 1 ,(length *all-flags*)) (aref *dictionary* ,n))) (defmacro make-flag-accessors () `(progn 'compile ,@(loop for x in *all-flags* for i from 0 by 1 collect `(defmacro ,(intern (string-append "DE-" x "-FLAG")) (n) `(ldb (byte 1 ,,i) (aref *dictionary* ,n)))))) (make-flag-accessors) (defun set-flag (n flag &optional (value 1)) (setq flag (int-char flag)) (let ((pos (find-position-in-list flag *all-flags*))) (if (null pos) (ferror nil "bad flag ~s" flag)) (setf (de-flags n) (dpb value (byte 1 pos) (de-flags n))))) ;;; Here we build up a list of words to go into dictionary ;;; Spell dictionary memory area (defvar spell-area) (defvar *word-list* nil) (defvar *number-of-words* 0 "Number of words being loaded") (defun make-word-list (&optional (source-file *dictionary-source-file*) &aux error-flag) (when (not (boundp 'spell-area)) (format t "~&Creating working storage area...~%") (setq *dictionary* nil) (setq spell-area (make-area :name 'spell-area :region-size #o1400000 :room t))) (setq *word-list* nil) (setq *number-of-words* 0) (setq source-file (fs:merge-pathname-defaults source-file :type "TEXT")) (format t "~&Opening ~s...~%" source-file) (with-open-file (in source-file) (format t "~&Making word list...~%") (setq error-flag (nth-value 1 (catch-error ;Must not crash (let ((default-cons-area spell-area)) ;while consing here (do ((word (send in :line-in) (send in :line-in)) (val nil nil) first-flag) ((or (null word) (= (string-length word) 0))) (cond ((= #/; (aref word 0))) ;Allow comments ('else (incf *number-of-words*) (when (setq first-flag (string-search #// word)) (do ((i first-flag (+ i 2)) (end (string-length word))) ((>= i end)) (push (int-char (aref word (1+ i))) val))) (push (substring word 0 first-flag spell-area) val) (push val *word-list*))))) )))) (cond (error-flag (format t "!!! Grievious error while making word list.~%") nil) (t *number-of-words*))) ;;; Stuff for manipulating the dictionary (defvar *dict-size* 0 "The calculated array size of the dictionary.") (defvar *hash-size* 0 "The calculated hashing space of the dictionary.") #+explorer (defun slow-spell-hash (s &optional (start 0) (end (string-length s))) (without-interrupts (let ((str (substring s start end)) h) (setq h (mod (sxhash str) *hash-size*)) (return-array (prog1 str (setq str nil))) h))) #-explorer (defun spell-hash (s &optional (start 0) (end (string-length s))) (mod (sxhash (substring s start end)) *hash-size*)) (defun install-word (word-number word-info) (setf (de-word word-number) (car word-info)) (setf (de-flags word-number) 0) (dolist (f (cdr word-info)) (set-flag word-number f)) (setf (de-used word-number) 1) (setf (de-next word-number) -1)) (defun dictionary-good-size (size) ;;; Convert SIZE (a number of array elements) to a more-or-less prime. (unless (oddp size) (incf size)) (do () ((and (not (zerop (\ size 3))) (not (zerop (\ size 5))) (not (zerop (\ size 7))))) (incf size 2)) size) ;;; Functions to build the dictionary (defvar *rehash-list* nil) (defmacro reset-spell-area() ;;Not needed with GC enabled? (unless (fboundp 'gc:flip) `(si:reset-temporary-area spell-area t))) (defun make-dictionary () "Makes initial blank spell dictionary." (format t "~&Creating new dictionary...~%") ;;; Clear working storage? (reset-spell-area) ;;; Determine array parameters based on # words (setq *hash-size* (dictionary-good-size (ceiling (times 1.25 ;Overflow factor *number-of-words*)))) (setq *dict-size* (* *hash-size* *qs-per-entry*)) (setq *dictionary* (make-array *dict-size* :area spell-area :initial-element nil))) (defun add-dictionary () "Load words from list into dictionary." (format t "~&Loading ~D words into dictionary...~%" *number-of-words*) (setq *rehash-list* nil) (dolist (w *word-list*) (let ((word-number (* *qs-per-entry* (spell-hash (car w))))) (cond ((null (de-word word-number)) (install-word word-number w)) (t (push w *rehash-list*))))) (dolist (w *rehash-list*) (let ((word-number (* *qs-per-entry* (spell-hash (car w)))) hole-number) ;find the last word in the chain (do () ((= (de-next word-number) -1)) ;???Can hit equal word??? (setq word-number (de-next word-number))) ;find a hole (setq hole-number word-number) (do () ((null (de-word hole-number))) (incf hole-number *qs-per-entry*) (if (>= hole-number *dict-size*) (setq hole-number 0))) (install-word hole-number w) (setf (de-next word-number) hole-number) (pop *rehash-list*)))) (defun load-dictionary (&key (query nil)) (cond ((or (not query) (y-or-n-p "Initialize the SPELL-CHECK dictionary? ")) (when (make-word-list) (make-dictionary) (add-dictionary))))) (defun add-to-dictionary(&optional (filename "spell-dictionary.text")) (when filename (make-word-list (pathname filename)) (add-dictionary))) (defun add-auxiliary-dictionary() (with-open-file (probe *dictionary-auxiliary-source-file* :direction :probe) (and probe (y-or-n-p "Load the auxiliary dictionary?") (add-to-dictionary *dictionary-auxiliary-source-file*)))) (defun add-private-dictionary() (let((path (merge-pathnames (fs:user-homedir) *dictionary-source-file*))) (with-open-file (probe path :direction :probe) (and probe (y-or-n-p "Load your private dictionary?") (add-to-dictionary path))))) (add-initialization "Initialize dictionary" '(load-dictionary :query t) '(once)) (add-initialization "Add auxiliary dictionary" '(add-auxiliary-dictionary) '(once)) (add-initialization "Add your private dictionary" '(add-private-dictionary) '(once)) ;;; Use some magic to allow swapping back and forth between ;;; a word to be tested and its variants. (defvar saved-word-array nil) (defun get-word-array (&optional (saved-place 'saved-word-array)) (let ((word (symbol-value saved-place))) (cond ((or (null word) (null (%store-conditional (locf (symbol-value saved-place)) word nil))) (make-array 50. :type :art-string :leader-list '(0) :area spell-area)) (t (store-array-leader 0 word 0) word)))) (defun free-word-array (word &optional (saved-place 'saved-word-array)) (setf (symbol-value saved-place) word)) ;;; Here comes the real work - functions to check a word against the ;;; dictionary. Used all over the place, but also useful in programs. (defun word-in-dictionary-p (s &optional (start 0) (end (string-length s))) "Check a word against the dictionary. Returns it's address if found exactly as input, NIL otherwise." (when (stringp s) ;Sanity check (do ((word-number (* *qs-per-entry* (spell-hash s start end)) (de-next word-number))) ((or (null word-number)(= word-number -1)(null (de-word word-number))) nil) (let ((len (- end start))) (cond ((and (= len (string-length (de-word word-number))) (%string-equal s start (de-word word-number) 0 len)) (return word-number)))) ))) (defun word-ok-p (word &optional (start 0) (end (string-length word)) &aux root) "Check a word against the dictionary. Returns NIL if not found; the word itself if it is in the dictionary; or the root word if that was found by removing suffixes." (if (word-in-dictionary-p word start end) ;;Just return word as is. (return-from word-ok-p (substring word start end)) ;;Else... (let ((word-length (- (or end (string-length word)) start))) (cond ((<= word-length 1) word) ((< word-length 4) nil) ;should look up in private dictionary! (t (let ((new-word (get-word-array))) (copy-array-portion word start end new-word 0 word-length) (store-array-leader word-length new-word 0) (let ((l (aref new-word (1- word-length)))) (setq root (cond ((char-equal #/D l) (spell-d-ending new-word word-length)) ((char-equal #/T l) (spell-t-ending new-word word-length)) ((char-equal #/R l) (spell-r-ending new-word word-length)) ((char-equal #/G l) (spell-g-ending new-word word-length)) ((char-equal #/H l) (spell-h-ending new-word word-length)) ((char-equal #/S l) (spell-s-ending new-word word-length)) ((char-equal #/N l) (spell-n-ending new-word word-length)) ((char-equal #/E l) (spell-e-ending new-word word-length)) ((char-equal #/Y l) (spell-y-ending new-word word-length)) (t nil)))) (free-word-array new-word) (cond ((null root) nil) ;should look up in separate private dictionary! (t root)) )))))) ;;; The following routines try to match a word to a root that's in the ;;; dictionary. Depending on the final character of the original word, the ;;; appropriate routine is called. Then various word endings are checked for ;;; and removed and/or changed. If what's left is a dictionary word, and that ;;; word has the appropriate flags hanging off it, then we have the root ;;; word. ;;; Words ending in G (defun spell-g-ending (word word-length) (block nil ;;word must end in ING (if (not (char-equal (aref word (- word-length 3)) #/I)) (return nil)) (if (not (char-equal (aref word (- word-length 2)) #/N)) (return nil)) ;;we already know the last letter is G ;;try to change the I to E, like CREATING (aset #/E word (- word-length 3)) (store-array-leader (- word-length 2) word 0) (if (< (string-length word) 2) (return nil)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-g-flag word-number))) (de-word word-number))))) (store-array-leader (- word-length 3) word 0) (if (< (string-length word) 2) (return nil)) ;; this stops CREATEING (if (char-equal (aref word (- word-length 4)) #/E) (return nil)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-g-flag word-number))) (de-word word-number))))) nil)) ;;; Words ending in D (defun spell-d-ending (word word-length) (block nil ;;word must end in ED (if (not (char-equal (aref word (- word-length 2)) #/E)) (return nil)) ;;we already know the last letter is D ;;kill the D (store-array-leader (- word-length 1) word 0) ;;like CREATED (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-d-flag word-number))) (de-word word-number))))) (if (< (string-length word) 3) (return nil)) ;;kill ED (store-array-leader (- word-length 2) word 0) (cond ((and (char-equal (aref word (- word-length 3)) #/I) (not (vowelp (aref word (- word-length 4))))) (aset #/Y word (- word-length 3)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-d-flag word-number))) (de-word word-number))))))) (cond ((or (and (not (char-equal (aref word (- word-length 3)) #/E)) (not (char-equal (aref word (- word-length 3)) #/Y))) (and (char-equal (aref word (- word-length 3)) #/Y) (vowelp (aref word (- word-length 4))))) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-d-flag word-number))) (de-word word-number))))))) nil)) ;;; Words ending in T (defun spell-t-ending (word word-length) (block nil ;;word must end in EST (if (not (char-equal (aref word (- word-length 3)) #/E)) (return nil)) (if (not (char-equal (aref word (- word-length 2)) #/S)) (return nil)) ;;we already know the last letter is T ;;cut off ST (store-array-leader (- word-length 2) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-t-flag word-number))) (de-word word-number))))) (if (< (string-length word) 3) (return nil)) ;;cut off EST (store-array-leader (- word-length 3) word 0) (cond ((and (char-equal (aref word (- word-length 4)) #/I) (not (vowelp (aref word (- word-length 5))))) (aset #/Y word (- word-length 4)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-t-flag word-number))) (de-word word-number))))))) (cond ((or (and (not (char-equal (aref word (- word-length 4)) #/E)) (not (char-equal (aref word (- word-length 4)) #/Y))) (and (char-equal (aref word (- word-length 4)) #/Y) (vowelp (aref word (- word-length 5))))) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-t-flag word-number))) (de-word word-number))))))) nil)) ;;; Words ending in R (defun spell-r-ending (word word-length) (block nil ;;must end in ER (if (not (char-equal (aref word (- word-length 2)) #/E)) (return nil)) ;;we already know the last letter is R; kill it (store-array-leader (- word-length 1) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-r-flag word-number))) (de-word word-number))))) (if (< (string-length word) 3) (return nil)) ;;kill ER (store-array-leader (- word-length 2) word 0) (cond ((and (char-equal (aref word (- word-length 3)) #/I) (not (vowelp (aref word (- word-length 4))))) (aset #/Y word (- word-length 3)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-r-flag word-number))) (de-word word-number))))))) (cond ((or (and (not (char-equal (aref word (- word-length 3)) #/E)) (not (char-equal (aref word (- word-length 3)) #/Y))) (and (char-equal (aref word (- word-length 3)) #/Y) (vowelp (aref word (- word-length 4))))) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-r-flag word-number))) (de-word word-number))))))) nil)) ;;; Words ending in H (defun spell-h-ending (word word-length) (block nil ;;must end in TH (if (not (char-equal (aref word (- word-length 2)) #/T)) (return nil)) ;;kill TH (store-array-leader (- word-length 2) word 0) (cond ((and (char-equal (aref word (- word-length 4)) #/I) (char-equal (aref word (- word-length 3)) #/E)) (aset #/Y word (- word-length 4)) (store-array-leader (- word-length 3) word 0))) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-h-flag word-number))) (de-word word-number))))) nil)) ;;; Words ending in S -- a real workhorse ; ; Checks for flags: X, J, Z, S, P, M ; ; X -ions or -ications or -ens ; J -ings ; Z -ers or -iers ; S -ies or -es or -s ; P -iness or -ness ; M -'S ; (defun spell-s-ending (word word-length) (block nil (store-array-leader (- word-length 1) word 0) (cond ((or (not (string-search (aref word (- word-length 2)) "SXZHY")) (and (char-equal (aref word (- word-length 2)) #/Y) (vowelp (aref word (- word-length 3))))) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-s-flag word-number))) (de-word word-number))))))) (let ((l (aref word (- word-length 2)))) (cond ((char-equal l #/N) ;for X flag (cond ((and (char-equal (aref word (- word-length 4)) #/I) (char-equal (aref word (- word-length 3)) #/O)) ;;word ended in xxxIONS ... replace with xxxE (aset #/E word (- word-length 4)) (store-array-leader (- word-length 3) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-x-flag word-number))) (de-word word-number))))))) (cond ((and (greaterp word-length 8) (char-equal (aref word (- word-length 8)) #/I) (char-equal (aref word (- word-length 7)) #/C) (char-equal (aref word (- word-length 6)) #/A) (char-equal (aref word (- word-length 5)) #/T) (char-equal (aref word (- word-length 4)) #/E)) (aset #/Y word (- word-length 8)) (store-array-leader (- word-length 7) word 0) (let ((word-number (word-in-dictionary-p word))) (if (and word-number (not (zerop (de-x-flag word-number)))) (return (de-word word-number)) (return nil))))) (cond ((and (char-equal (aref word (- word-length 3)) #/E) (not (char-equal (aref word (- word-length 4)) #/E)) (not (char-equal (aref word (- word-length 4)) #/Y))) (store-array-leader (- word-length 3) word 0) (let ((word-number (word-in-dictionary-p word))) (if (and word-number (not (zerop (de-x-flag word-number)))) (return (de-word word-number)) (return nil))))) (return nil)) ((char-equal l #/G) ;J flag (if (not (char-equal (aref word (- word-length 4)) #/I)) (return nil)) (if (not (char-equal (aref word (- word-length 3)) #/N)) (return nil)) ;;word ended in INGS ... remove INGS, put on E (aset #/E word (- word-length 4)) (store-array-leader (- word-length 3) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-j-flag word-number))) (de-word word-number))))) ;;now remove the E (store-array-leader (- word-length 4) word 0) (if (and (greaterp (string-length word) 5) (char-equal (aref word (- word-length 5)) #/E)) (return nil)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-j-flag word-number))) (de-word word-number))))) (return nil)) ((char-equal l #/R) ;Z flag ;;must end in ERS (if (not (char-equal (aref word (- word-length 3)) #/E)) (return nil)) ;;remove RS (store-array-leader (- word-length 2) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-z-flag word-number))) (de-word word-number))))) (cond ((char-equal (aref word (- word-length 4)) #/I) ;;word ended in IERS ... remove and add Y (aset #/Y word (- word-length 4)) (store-array-leader (- word-length 3) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-z-flag word-number))) (de-word word-number))))) (return nil))) ;;now chop at E from ...ERS (store-array-leader (- word-length 3) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-z-flag word-number))) (de-word word-number))))) (return nil)) ((char-equal l #/E) ;S flag (except simple adding of S (?) ;;word ends in ES ... prevent "ACEES", "HATEES"! (if (char-equal (aref word (- word-length 3)) #/E) (return nil)) ;;word ends xES where x is consonant or vowel not E... ;;Lets through "ACTES" ! ;;Not to mention "TESTES", which is right for the wrong reason. (store-array-leader (- word-length 2) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-s-flag word-number))) (de-word word-number))))) (cond ((char-equal (aref word (- word-length 3)) #/I) ;;ok, we had IES ... remove, add Y (aset #/Y word (- word-length 3)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-s-flag word-number))) (de-word word-number))))))) (return nil)) ((char-equal l #/S) ;P flag (if (not (char-equal (aref word (- word-length 4)) #/N)) (return nil)) (if (not (char-equal (aref word (- word-length 3)) #/E)) (return nil)) ;;ok, we had NESS ... kill it (store-array-leader (- word-length 4) word 0) ;;don't check ONESS, etc. (if (lessp (string-length word) 2) (return nil)) (cond ((or (not (char-equal (aref word (- word-length 5)) #/Y)) (vowelp (aref word (- word-length 6)))) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-p-flag word-number))) (de-word word-number))))))) (cond ((char-equal (aref word (- word-length 5)) #/I) (aset #/Y word (- word-length 5)) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-p-flag word-number))) (de-word word-number))))))) (return nil)) ((char-equal l #/') ;M flag, ...'S *** ! (store-array-leader (- word-length 2) word 0) (return word)) (t (return nil)))))) (defun spell-n-ending (word word-length) (block nil (cond ((char-equal (aref word (- word-length 2)) #/E) ;;ended in EN (if (or (char-equal (aref word (- word-length 3)) #/E) (char-equal (aref word (- word-length 3)) #/Y)) (return nil)) (store-array-leader (- word-length 2) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-n-flag word-number))) (de-word word-number))))) (return nil))) (if (not (char-equal (aref word (- word-length 3)) #/I)) (return nil)) (if (not (char-equal (aref word (- word-length 2)) #/O)) (return nil)) ;;word ended in ION, replace with E (aset #/E word (- word-length 3)) (store-array-leader (- word-length 2) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-n-flag word-number))) (de-word word-number))))) (when (greaterp (string-length word) 7) (if (not (char-equal (aref word (- word-length 7)) #/I)) (return nil)) (if (not (char-equal (aref word (- word-length 6)) #/C)) (return nil)) (if (not (char-equal (aref word (- word-length 5)) #/A)) (return nil)) (if (not (char-equal (aref word (- word-length 4)) #/T)) (return nil)) (if (not (char-equal (aref word (- word-length 3)) #/E)) (return nil)) ;;original word would have been ...ICATION (aset #/Y word (- word-length 7)) (store-array-leader (- word-length 6) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-n-flag word-number))) (de-word word-number)))))) (return nil))) (defun spell-e-ending (word word-length) (block nil (if (not (char-equal (aref word (- word-length 3)) #/I)) (return nil)) (if (not (char-equal (aref word (- word-length 2)) #/V)) (return nil)) ;;ended in IVE ... change to E (aset #/E word (- word-length 3)) (store-array-leader (- word-length 2) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-v-flag word-number))) (de-word word-number))))) (if (char-equal (aref word (- word-length 4)) #/E) (return nil)) (store-array-leader (- word-length 3) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-v-flag word-number))) (de-word word-number))))) (return nil))) (defun spell-y-ending (word word-length) (block nil (if (not (char-equal (aref word (- word-length 2)) #/L)) (return nil)) ;ends in LY, remove (store-array-leader (- word-length 2) word 0) (let ((word-number (word-in-dictionary-p word))) (when word-number (return (and (not (zerop (de-y-flag word-number))) (de-word word-number))))) (return nil))) ;;; The main action (defvar *possibilities* nil "Global list of word variants") (defvar *meanings* nil "Global list of word derivations") (defvar saved-word-for-possibilities nil) (defun make-possibilities (word) (setq *possibilities* nil) (setq *meanings* nil) (let ((new-word (get-word-array 'saved-word-for-possibilities))) (*catch 'enough (wrong-letter word new-word) (extra-letter word new-word) (missing-letter word new-word) (transposed-letter word new-word)) (free-word-array new-word 'saved-word-for-possibilities)) (values *possibilities* *meanings*)) ;;; On Lambda we get fancy and notify the WHO line we're ;;; doing spell-checking... ;#+LAMBDA ;(defun spell-word-2(word) ; (tv:with-who-line-run-state ; ("SPELL") ; (make-possibilities word))) ;#-LAMBDA (deff spell-word-2 #'make-possibilities) ;;; Standard spelling checker interface (defun spell-word (word &optional (start 0) (end (string-length word))) "Standard interface to dictionary. Input must be a string. START, END if specified specify the beginning index and length of string to check. The return value can be: The input word: The word was found either in the system dictionary or your private dictionary. Another string: This string, derived by stripping suffixes from the original word, was found in the system dictionary. A list or strings: These strings are words that are /"close/" to the original word. They are already capitalized like the original word. NIL: Neither the original or any near misses could be found." (cond ((word-ok-p word start end)) (t (values (make-possibilities (substring word start end)))))) ;;; Forgiving interface (defun spell-check (word) "Dictionary-based spelling checker. The input word can be anything acceptable to the STRING function, e.g. a character string or an id. Returns up to two values: Word, NIL: The word was found in the dictionary exactly as input. Word, Word: The second word, derived by stripping suffixes from the first word, was found in the dictionary. List, List: The strings in the first list are words that are /"close/" to the original word. They are already capitalized like the original word. The keywords in the second list indicate what permutation was applied to produce the variant. NIL, NIL: Neither the original nor any variants could be found." ;;; Pre-process the input string (setq word (string-trim *ng-characters* word)) ;Cut to order (let ( (word-ok (word-ok-p word 0 (string-length word))) ) (cond ((string-equal word word-ok) (values word nil)) (word-ok (values word (spell-string-copy word word-ok))) (t (spell-word-2 word))))) (deff zwei:spell-word 'spell-word) ;Let Zmacs see std. spell check routine ;;; Utility functions (defun spell-string-copy (original-word new-word) "Copy case structure of original (input) word to variant (output) word. I.e. if original word began with upper-case letter, make sure output word does..." (cond ((< (string-length original-word) 2) (cond ((zerop (string-length original-word)) new-word) ((upper-case-p (aref original-word 0)) (string-upcase new-word)) (t (string-downcase new-word)))) ((upper-case-p (aref original-word 0)) (cond ((upper-case-p (aref original-word 1)) (string-upcase new-word)) (t (let ((copy (string-downcase new-word))) (aset (char-upcase (aref new-word 0)) copy 0) copy)))) (t (string-downcase new-word)))) (defun insert (word meaning) "Put new variant word on global possibilities list, but no duplicates, and no more than *enough-words* of them in all." (unless (member word *possibilities*) (push word *possibilities*) (push meaning *meanings*) (if (>= (length *possibilities*) *enough-words*) (*throw 'enough nil)))) ;;; Functions that generate and check permutations of a word. Note that the ;;; input word must already be in 'word array' format. (defun wrong-letter (word new-word) (store-array-leader 0 new-word 0) (string-nconc new-word word) (dotimes (char-number (string-length word)) (do ((letter (char-int #/A) (1+ letter))) ((> letter (char-int #/Z))) (aset letter new-word char-number) (if (word-ok-p new-word) (insert (spell-string-copy word new-word) :wrong-letter))) (aset (aref word char-number) new-word char-number))) (defun extra-letter (word new-word) (dotimes (char-number (string-length word)) (store-array-leader 0 new-word 0) (do ((from 0 (1+ from))) ((= from (string-length word))) (cond ((not (= from char-number)) (string-nconc new-word (aref word from))))) (if (word-ok-p new-word) (insert (spell-string-copy word new-word) :extra-letter)))) (defun missing-letter (word new-word) (dotimes (char-number (1+ (string-length word))) (store-array-leader 0 new-word 0) (dotimes (x char-number) (string-nconc new-word (aref word x))) (string-nconc new-word 0) (do ((x char-number (1+ x))) ((= x (string-length word))) (string-nconc new-word (aref word x))) (do ((letter (char-int #/A) (1+ letter))) ((> letter (char-int #/Z))) (aset letter new-word char-number) (if (word-ok-p new-word) (insert (spell-string-copy word new-word) :missing-letter))))) (defun transposed-letter (word new-word) (dotimes (char-number (1- (string-length word))) (store-array-leader 0 new-word 0) (string-nconc new-word word) (let ((temp (aref new-word char-number))) (aset (aref new-word (1+ char-number)) new-word char-number) (aset temp new-word (1+ char-number))) (if (word-ok-p new-word) (insert (spell-string-copy word new-word) :transposed-letter)))) ;;; Exercises (defconst test-list '(("CREATE" "CREATIVE") ("PREVENT" "PREVENTIVE") ("CREATE" "CREATION") ("MULTIPLY" "MULTIPLICATION") ("FALL" "FALLEN") ("CREATE" "CREATIONS") ("MULTIPLY" "MULTIPLICATIONS") ("WEAKEN" "WEAKENS") ("TWENTY" "TWENTIETH") ("HUNDRED" "HUNDREDTH") ("QUICK" "QUICKLY") ("FILE" "FILING") ("CROSS" "CROSSING") ("FILE" "FILINGS") ("CROSS" "CROSSINGS") ("CREATE" "CREATED") ("IMPLY" "IMPLIED") ("CROSS" "CROSSED") ("CONVEY" "CONVEYED") ("LATE" "LATEST") ("DIRTY" "DIRTIEST") ("SMALL" "SMALLEST") ("GRAY" "GRAYEST") ("SKATE" "SKATER") ("MULTIPLY" "MULTIPLIER") ("BUILD" "BUILDER") ("CONVEY" "CONVEYER") ("SKATE" "SKATERS") ("MULTIPLY" "MULTIPLIERS") ("BUILD" "BUILDERS") ("SLAY" "SLAYERS") ("IMPLY" "IMPLIES") ("FIX" "FIXES") ("BAT" "BATS") ("CONVEY" "CONVEYS") ("CLOUDY" "CLOUDINESS") ("LATE" "LATENESS") ("GRAY" "GRAYNESS") ("DOG" "DOG'S") )) (defun run-tests () (dolist (x test-list) (cond ((not (string-equal (car x) (word-ok-p (cadr x)))) (format t "~&failed match ~s ~s" x (word-ok-p (cadr x)))) (t (format t "~&ok match ~s" (cadr x)))))) (defun look-up-all-words (&optional (source-file *dictionary-source-file*)) (with-open-file (in source-file) (do ((word (send in :line-in) (send in :line-in))) ((or (null word) (zerop (string-length word)))) (cond ((= #/; (aref word 0))) ((word-in-dictionary-p (substring word 0 (string-search #// word)))) ('else (ferror nil "can't find ~s" word)))))) (defun decode-word (word-number) (cond ((null word-number) nil) ((consp word-number) word-number) ((numberp word-number) (format t "~&~D ~20a " word-number (de-word word-number)) (format t "~a " (de-next word-number)) (decode-flags (de-flags word-number))) ((stringp word-number) (multiple-value-bind (word realword) (spell-check word-number) (cond ((null word) nil) ((null realword) (decode-word (word-in-dictionary-p word))) ((stringp realword) (decode-word realword)) (t word)))) (t (decode-word (string word-number))))) (defun decode-flags (flags) (when flags (dotimes (i (length *all-flags*)) (if (ldb-test (byte 1 i) flags) (format t "~a " (nth i *all-flags*)))) (if (ldb-test (byte 1 (length *all-flags*)) flags) (format t "USED ")))) (defun word-flags(word &aux word-number) (when (setq word-number (word-in-dictionary-p word)) (let ((flags (de-flags word-number))) (when (and (numberp flags) (greaterp flags 0)) (let ((flag-list nil)) (dotimes (flag-number (length *all-flags*) (reverse flag-list)) (if (ldb-test (byte 1 flag-number) flags) (push (nth flag-number *all-flags*) flag-list))) ))))) (defun print-dictionary () (let ( (n (1- (array-dimension *dictionary* 0))) ) (loop for word-number from 0 to n by 3 do (format t "~&~20a " (de-word word-number)) (format t "~a " (de-next word-number)) (decode-flags (de-flags word-number))))) (defun describe-dictionary () (let ( (n (1- (array-dimension *dictionary* 0))) (total-words 0) (total-word-size 0) (total-holes 0) ) (loop for entry from 0 to n by 3 with thisword = nil when (setq thisword (de-word entry)) do (incf total-words) (incf total-word-size (string-length thisword)) else do (incf total-holes)) (format t "~&~S has ~D words total, ~D empty slots, ~D bytes, ~D bytes per word" *dictionary* total-words total-holes total-word-size (if (greaterp total-words 0) (// total-word-size total-words) 0)) (values total-words total-holes total-word-size))) ;;; Allow the user to register complaints about good words that don't take... ;;; and bad words that do. ;;; Shortand for formatted i/o on the right stream with fresh line: (defmacro tell-user (&optional format-string &rest args) `(format *query-io* ,(string-append "~&" (or format-string "")) ,@args)) (defun complain(&optional word problem complaint control) ;;problem is one of: ;; :bug :bad-word :good-word ;; :bad-answer :better-answer ;; :other :no-problem (unless word (tell-user "Tell me the exact word you disagree about: ") (setq word (string-trim *ng-characters* (readline)))) (unless control (tell-user "Just a moment to recheck that...") (setq control (spell-check word))) (cond ;;Got a single, 'correct' answer -- it's either -- ((stringp control) (tell-user "looks valid!") (cond ((fquery nil "Do you think '~a' is misspelled? " word) (tell-user "Thanks for the advice.") (setq problem :bad-word) ;A bad word we think's good, or (setq complaint (format nil "~a is invalid" word))) (t (setq problem :no-problem) ;A good word - no complaint. (tell-user "Then we agree. Bye.") (return-from complain)))) ;; Ask user if he is complaining about a good word we consider bad ((fquery nil "OK - Do you think ~a's correct? " word) (tell-user "Thanks for the advice.") (setq problem :good-word) (setq complaint (format nil "~a should be OK" word))) ;; If no, user agree's it's misspelled as given. If that's what the ;; spelling checker told him, then punt. ((null control) (setq problem :no-problem) (tell-user "I don't recognize '~a' either." word) (tell-user "Fine, then we agree about something!") (return-from complain)) ;; Must have gotten a list of variants. ((consp control) (tell-user "Even though ~a looks wrong to me,~%" word) (tell-user "I find the alternative~P: ~{~s ~}~%" (length control) control) (cond ;; Maybe he's complaining about one of our menu suggestions... ((fquery nil "Did I suggest a choice that you don't like? ") (setq problem :bad-answer) (setq complaint (string-append (format nil "~a - objects to alternative " word) (if (cdr control) (prompt-and-read :read "~&Which one? ") (car control)) " because " (prompt-and-read :string-trim "...why? ")))) ;; No? Then ask him for the one he's dying to give us. Everybody's a ;; critic... (t (setq problem :better-answer) (tell-user "Then I have to ask you to give me a better suggestion.") (tell-user "What's your preference? ") (setq complaint (format nil "~a choices OK but could use ~s" word (read))))) ) (t (setq problem :bug) (setq complaint (format nil "~a returns ~s ???" word control))) ) (log-complaint complaint) complaint) (defun log-complaint(complaint &optional (log-file *complaint-log-file*)) (with-open-file (s (fs:merge-pathname-defaults log-file :type "TEXT") :direction :output :if-exists :append :if-does-not-exist :create) (format s ">> ~a~%" (string-upcase complaint)))) ;; Routine to increment a number stored as target in an a-list based on ;; the input tag (defmacro isoc(k l) `(incf (cdr (or (assoc ,k ,l) (car (push (cons ,k 0) ,l))) ))) ;;; Do a spelling check on an input file. (defun check-words(input &key (pred '(lambda(x) (not(stringp(word-ok-p x))))) (counts nil) (verbose nil) &aux found (count 0) temp eof) (with-open-file (in (fs:merge-pathname-defaults input :type "TEXT")) (loop while (not eof) do (multiple-value-setq (temp eof) (read-delimited-string *ng-characters* in nil)) (when (> (string-length temp) 0) (when (funcall pred temp) (when verbose (print temp)) (if counts (isoc temp found) (unless (member temp found) (pushnew temp found))) (incf count))) finally (return (values found count)))))