#| -*- Mode:LISP; Package:USER; Fonts:(cptfontb); Base:10 -*- Main engine PARSE accepts a rule-name (any of the keys in *rules*) and parses the input list of words accordingly. This is brute-force, feet-first parsing -- there is no back-tracking at all. For example, the first successful parse in an :EITHER clause will ensure that the rest of the parse is continued, but any remaining (possibly ok) :EITHER branches will not be followed if the parse fails later. The terminal parts-of-speech are (for now) table-driven. Each part is associated with an input list of words. A rule can refer to any number of terminals and/or other rules. Each entry in the *rules* table is compiled into a lexical closure which will parse a sentence/fragment according to the rule syntax. At this point, this syntax is very simple, and the closures are not as complex as they should be for this method to pay off big. The *rules* table is an association-list. For each rule-name, the entry consists of a keyword followed by parts -- terminals or rules. The keywords are: :SEQ -- consume a simple fixed sequence of parts :ANY -- consume any number of indicated parts :EITHER -- consume according to the first matching parts At this point, these keywords may not be combined in a single rule. Multiple definitions would be needed to get the desired effect of (:SEQ (:EITHER xxx yyy zzz) aaa bbb) The :SEQ clause has one sub-clause, enclosed in a list: :OPTIONAL -- consume a sequence if present The :OPTIONAL clause allows (:SEQ aaa (:OPTIONAL xxx ) bbb) to match either '(aaa xxx bbb) or '(aaa bbb). For most of the parsing routines, two values are returned: first, the fragment (if any) that was parsed successfully; second, the fragment (if any) that remained from the input. Main routines of interest are: PARSE -- parse an input list by rule, which defaults to 'SENTENCE. CONSUME-xxx (...-WORD, ...-RULE, ...-SEQ, ...-EITHER, ...-ANY) -- for parsing fragments by a given parsing form. ADD-WORD-IS-PART -- Add a word as a given part of speech. |# ;;; Global variables (defconst *verbose* nil "Verbose trace switch") (defconst $part2words '((det . (the a some this that) ) ;determiners (noun . (lambda processor dog cat message error ) ) ;nouns (pnoun . (keith marianne LMI lambda zetalisp unix) ) ;pronouns (verb . (crash burn run die work eat) ) ;verbs (adverb . (good bad slow well poor) ) ;adverbs (adject . (good bad big yellow) ) ;adjectives (aux-verb . (can will does)) ;auxiliary verbs (negative . (not)) ;negative emphasis (positive . (indeed)) ;positive emphasis (possn . (my his her your their our) ) ;possive pronouns (punct . (/. /; /, /! /?) )) ;punctuation chars "Lookup from part-of-speech to possible words") (defconst *terminals* (mapcar #'car $part2words) "Terminal parts of speech") (defconst *rules* '( (simple . (:seq noun verb)) (noun-group . (:any noun pnoun)) (simple-np . (:either noun pnoun)) (adjects . (:any adject)) (np-mod . (:either det possn)) (full-np . (:seq (:optional np-mod) (:optional adjects) simple-np)) (noun-p . (:either simple-np full-np)) (emphasis . (:either negative positive)) (verb-obj . (:seq noun-p (:optional adverb))) (verb-mod . (:either adverb verb-obj)) (verb-mod-p . (:seq verb (:optional verb-mod))) (verb-aux . (:seq aux-verb (:optional emphasis) verb)) (verb-p . (:either verb-mod-p verb-aux)) (statement . (:seq noun-p verb-p (:optional punct))) (question . (:seq aux-verb noun-p verb-mod-p (:optional punct))) (sentence . (:either question statement)) ) "Major parsing rules") (defconst $part2parse nil "Lookup from part-of-speech to parsing code") (defconst $rule2parse nil "Lookup from rule-name to parsing code") ;;; Utility functions ;;; Predicates: (defun terminal-p (term) (member term *terminals*)) (defun rule-p (rule) (assoc rule *rules*)) (defun only-terminals-left (terms) (null(remove-if #'terminal-p terms))) ;;; Manage a-lists: (defun ptree(x y) (list(list x y))) (defmacro push-a-val(key val alist) ;; Push a value on a-list by key `(push ,val (cdr (assoc ,key ,alist)))) (defun add-word-is-part(part word) (push-a-val part word $part2words)) (defmacro poke-a-list(alist key vals) ;; Initialize a-list entry `(setf (cdr(assoc ,key ,alist)) ,vals)) (defun set-words-are-part(part words) (poke-a-list $part2words part words)) ;;; Funcalls on lists of predicates (defun funcall-or (preds thing) ;; Call a list of predicates, OR'ing along (when (consp preds) (or (funcall (car preds) thing) (funcall-or (cdr preds) thing)))) (defun funcall-and(ps x) ;; Call a list of predicates, AND'ing along (dolist (p ps t) (unless (funcall p x) (return-from funcall-and nil)))) (defmacro verbosely (&rest options) "Print the parser trace, if *verbose* is non-NIL." `(if *verbose* (format t ,@options))) ;;; Define verification tests (defconst *tests* '(((lambda die) . simple) ((the lambda) . noun-p) ((eat slow) . verb-mod-p) ((crash lambda slow) . verb-p) ((my yellow dog eat lambda) . statement) ((will dog die /?) . sentence) ((the lambda will not crash) . sentence) )) (defun run-tests() (dolist (test *tests*) (format t "~2&+++Test parsing ~s with ~s~&" (car test) (cdr test)) (format t "~%---Result is:~%") (pprint (cond ((atom(cdr test)) (consume-rule (car test) (cdr test))) (t (consume-seq (car test) (cdr test))) )))) ;;; Constructing closures -- pieces of parsing intelligence (defun simple-lookup-closure(key alist) ;; Return a closure that tests whether a value is associated ;; with the input keyword in the input a-list (function(lambda(x) (member x (cdr (assoc key alist)))))) (setq $part2parse ;;; For each terminal, returns a predicate that returns whether ;;; the input word can be that type `(,@(loop for term in *terminals* collect (cons term (simple-lookup-closure term $part2words))))) (defun get-part-ok-p(part) (or (cdr(assoc part $part2parse)) #'(lambda(x)(ferror nil "No rule for parsing ~s with terminal ~s" x part)) )) (defun find-parsers(parse-options &aux this-parse) (cond ((null parse-options) nil) (t (setq this-parse (car parse-options)) (cons (or (get-rule-ok-p this-parse) (get-part-ok-p this-parse)) (find-parsers (cdr parse-options)))))) (defun consume-word (sequence to-parse &key (mandatory t) &aux (word (car sequence)) ) (let*( (parse-ok-p (get-part-ok-p to-parse)) ) (verbosely "Consume-Word ~s by ~s~&" word to-parse) (cond ((null (closurep parse-ok-p)) (format t "???Expected function, found ~s parsing for ~s.~&" parse-ok-p to-parse) nil) (t (cond ((funcall parse-ok-p word) (verbosely "Parse ok ~s = ~s.~&" word to-parse) (values (ptree to-parse word) (cdr sequence))) (t (when mandatory (verbosely "Parse failed found ~s expected ~s.~&" word to-parse)) (values nil sequence)) ))) )) (defun consume-rule (fragment to-parse) (let*( (parse-ok-p (get-rule-ok-p to-parse)) ) (verbosely "Consume-Rule ~s by ~s~&" fragment to-parse) (cond ((null (closurep parse-ok-p)) (format t "???Expected function, found ~s parsing for ~s.~&" parse-ok-p to-parse) nil) (t (funcall parse-ok-p fragment)) ))) (defun consume-any(sequence to-parse &optional parsed (preds (mapcar #'get-part-ok-p to-parse))) ;; Consume any number of words in ;; matching any of the terminals in . ;; Returns 2 values - parsed, and non-parsed sequences (cond ((null sequence) (values (ptree to-parse parsed) nil)) ;All matched (t (verbosely "Consume-any ~s by any of ~s~&" sequence (or to-parse preds)) (cond ((funcall-or preds (car sequence)) ;Check term (consume-any (cdr sequence) nil (append parsed (list (car sequence))) preds)) (t (values (ptree to-parse parsed) sequence)) ;No more matching )))) (defun consume-either(sequence to-parse &optional (preds (find-parsers to-parse)) &aux (this-parse (car to-parse)) (this-pred (car preds)) ) ;; Consume a fragment in ;; matching any of the terminals or rules in . ;; Returns 2 values - parsed, and non-parsed sequences. (verbosely "Consume-either ~s by any of ~s~&" sequence to-parse) (cond ((null this-parse) (values nil sequence)) ;Ran out of options ((null (closurep this-pred)) (ferror nil "Expecting parser, found ~s in Consume-Either" this-parse)) ((and (terminal-p this-parse) (funcall this-pred (car sequence))) (values (ptree this-parse (car sequence)) (cdr sequence))) (t (multiple-value-bind (parsed left) (funcall this-pred sequence) (if parsed (values (ptree this-parse parsed) left) (consume-either sequence (cdr to-parse) (cdr preds))))) )) (defun consume-terms (sequence to-parse &optional parsed (orig sequence)) ;; Parse the given sequence for terminals. ;; Returns up to 2 values: the fragments parsed and non-parsed. (verbosely "Consume-Terms ~s by ~s~&" sequence to-parse) (cond ((null to-parse) (values (ptree '? parsed) sequence)) ((null sequence) (values nil orig)) (t (cond ((consume-word sequence (car to-parse)) (consume-terms (cdr sequence) (cdr to-parse) (append parsed (list (car sequence))) orig)) (t (values nil orig)))))) (defun consume-seq (sequence to-parse &optional parsed (orig sequence) &aux (this-parse (car to-parse))) ;; Parse the given sequence for terminals and/or rules. ;; Returns 2 values: parsed and unparsed portions. ;; (NIL, sequence if all terms were consumed). (verbosely "Consume-Seq ~s by ~s already got ~s from ~s~&" sequence to-parse parsed orig) (cond ((null to-parse) (values (ptree '? parsed) sequence)) ;Nothing left ((listp this-parse) (let ( (keyword (car this-parse)) ) (when (equal keyword :optional) (multiple-value-bind (optionally-parsed leftover) (consume-seq sequence (cdr this-parse)) (cond ((null optionally-parsed) (consume-seq sequence (cdr to-parse) parsed orig)) ((null (cdr to-parse)) (values (append parsed (ptree (cdr this-parse) optionally-parsed)) leftover)) ((null leftover) (values nil orig)) (t (consume-seq leftover (cdr to-parse) (append parsed optionally-parsed) orig))))))) ((null sequence) (values nil orig)) ;Ran out of sentence ((atom this-parse) (multiple-value-bind (now-parsed leftover) (cond ((terminal-p this-parse) ;Got a terminal (consume-word sequence this-parse)) ((rule-p this-parse) (consume-rule sequence this-parse)) (t (ferror nil "Cannot parse sequence by ~s" to-parse))) (cond ((null now-parsed) (values nil orig)) ((null (cdr to-parse)) (values (ptree this-parse (append parsed now-parsed)) leftover)) (t (consume-seq leftover (cdr to-parse) (append parsed now-parsed) orig))))) )) ;;; Create top-level parse structure -- closures that correspond to ;;; the rules. (defun xlate-rule(rule) (let ( (keyword (car rule)) (tform (cdr rule)) ) (case keyword (:seq (if (only-terminals-left tform) #'(lambda(frag) (consume-terms frag tform)) #'(lambda(frag) (consume-seq frag tform)))) (:any #'(lambda(frag) (consume-any frag tform))) (:either #'(lambda(frag) (consume-either frag tform))) (t (cerror "Create an error-warning rule" "Can't create a parsing function for ~s" rule) #'(lambda(frag) (ferror nil "No way to parse ~s by ~s" frag rule))) ))) (setq $rule2parse ;; Create an a-list containing, for each rule, a closure that parses it `(,@(loop for rule in *rules* collect (cons (car rule) (xlate-rule (cdr rule)))))) (defun get-rule-ok-p (rule) (cdr (assoc rule $rule2parse))) (defun parse(fragment &optional (to-parse 'sentence)) (let ( (rule (get-rule-ok-p to-parse)) ) (if rule (funcall rule fragment) (ferror nil "~s is not a rule." to-parse))))