#| -*- Mode:LISP; Package:(MARCUS global); Fonts:(cptfontb); Base:10 -*- This program is a deterministic parser, which uses limited look-ahead to determine the parse. Input: A list containing the words of the sentence to be parsed. (This parser does not handle, or expect, punctuation, with the exception of an optional '.' at end of sentence.) Oputput: A tree diagram of the parse, showing all the elements parsed from sentence to the lexical entries. Example: (marcus '(kim gave robin the book)) ==> (s (np (propn (lex kim))) (vp (v (lex gave)) (np (art (lex the)) (noun (lex book))) (pp (prep (lex to)) (np (propn (lex robin)))))) |# ;;; ;;; friendly utility functions ;;; (DEFMACRO WHILE (PRED &REST BODY) "(WHILE pred { form1 form2 ... } ) => (DO ((NOT pred)) (PROGN form1 form2 ...)) WHILE loops repeatedly, evaluating the predicate 'pred' -- if the result is non-NIL, the body 'forms' are executed, and then back to the predicate, and so on. Normally the value returned is the last 'form' in the body. Because WHILE is expanded into an equivalent DO form, (RETURN values...) may be used to force an exit and return values from the loop." `(DO ($WHILE$) ((NOT ,PRED) $WHILE$) (SETQ $WHILE$ (PROGN . ,BODY)))) (DEFUN INSERT (L-OBJ L-LIST FUNC &OPTIONAL DUPS-P) "(INSERT l-obj l-list func dups-p) Insert 'l-obj' destructively into 'l-list' at the place determined by 'func'. The function-spec 'func' should expect 2 arguments, the 'l-obj' and an element of 'l-list'. 'func' is called recursively on 'l-obj' and the head of 'l-list'; if and when 'func' returns a non-NIL value, 'l-obj' is spliced into the list ahead of the current element of 'l-list'. For example: (setq foo '(1 2 5 8 10)) => (1 2 5 8 10) (insert 6 foo 'lessp) => (6 8 10) foo => (1 2 5 6 8 10) The flag 'dups-p' can be included as the last argument to indicate that if a duplicate value is encountered before 'l-obj' is spliced in, don't insert the duplicate. For example: (insert 5 foo 'lessp) => (5 6 8 10) foo => (1 2 5 5 6 8 10) (insert 5 foo 'lessp t) => NIL foo => (1 2 5 5 6 8 1))" (COND ((NULL L-LIST) NIL) ((AND DUPS-P (EQUAL L-OBJ (CAR L-LIST))) NIL) ((FUNCALL FUNC L-OBJ (CAR L-LIST)) (PSL:PUSH-IP L-OBJ L-LIST)) (T (INSERT L-OBJ (CDR L-LIST) FUNC DUPS-P)))) ;;; ;;; global declarations ;;; (defvar *inputstring* nil "List of ids --> sentence to be parsed") (defvar *buffer* nil "Stack/list --> parsing items") (defvar *bufferend* 0 "Buffer pointer") (defvar *stack* nil "Stack/list --> nodes") (defconstant *defaultpriority* 10 "???") (defvar *topcat* 's) (defvar *windowsize* 3) (defvar p 0 "tab position") (defvar *rules* '((subj sstart 10 ((1 np)) (attach 1 -1) (deactivate sstart) (newnode 'vp) (activate parsevp)) (npn npack 10 ((1 n)) (attach 1 -1) (drop) (restore)) (snp cpool 0 (! (1 art)) (newnode 'np) (attach 1 -1) (activate npack)) (pnp cpool 0 (! (1 propn)) (newnode 'np) (attach 1 -1) (drop) (restore)) (pp cpool 1 (! (1 prep) (2 np)) (newnode 'np) (attach 1 -1) (attach 1 -1) (drop) (restore)) (obj parsevp 10 ((1 np)) (attach 1 -1) (attach -1 -2)) (vb parsevp 10 ((1 v) (2 np)) (attach 1 -1)) (pmod parsevp 10 ((1 pp)) (attach 1 -1)) (!obj parsevp 5 ((1 np) (2 np)) (attach 2 -1) (attach -1 -2) (insertat 'to 1))) "Grammar rules") (defvar dict '((tb (prep)) (book (n)) (gives (v (/#objs 2) (pn s3) (lex give))) (runs (v (pn s3) (/#objs 0) (lex run))) (like (v (/#objs 1))) (and (conj)) (that (comp)) (with (prep)) (mary (propn)) (scope (n)) (a (art (pn s3))) (park (n)) (in (prep)) (/'s (pos)) (duck (n)) (saw (v (tns pst)(/#objs 1)(lex see))) (likes (v (pn s3) (/#objs 1)(lex like))) (men (n (pn p1)(lex man))) (man (n)) (the (art (pn s3 p1))) (/. (punct (type final))) (john (propn))) "Words in the dictionary") (defvar redun '((n (pn s3)) (propn (pn s3)) (v (tns (pres) /#objs 0 1) (pn sg p1))) "Redundancy table") (defvar *feats* nil) (defvar *activepackets* nil "List of active grammar packets") (defvar *alwayspackets* '(cpool) "List of always active grammar packets") (defvar *startpackets* '(sstart cpool) "Packets to make active at initialization") (defvar *packetlibrary* nil "List of known packets?") (defvar *offset* 0) (defvar *offsetstack* nil) (defvar *cfig* nil "?????") ;;; ;;; utility functions ;;; (defmacro incr(id) (if (and id (symbolp id)) `(setq ,id (add1 ,id)))) (defmacro decr(id) (if (and id (symbolp id)) `(setq ,id (sub1 ,id)))) ;;; (defun createnode (cat) (list 'node cat (intern(gensym 's)) nil)) (defun newnode (cat) (pushstack (createnode cat))) (defun getsubnodes(node) (cddddr node)) (defun addtosubnodes(node newsub) (rplacd (cdddr node) (cons newsub (getsubnodes node)))) (defun setsuper (node newsuper) (putprop (caddr node) newsuper 'super)) (defun getsuper (node) (get (caddr node) 'super)) (defun getcat (node) (cadr node)) ;;; ;;; data-type predicates ;;; (defun nodep (ptr) (and (listp ptr) (eq (car ptr) 'node))) (defun lexp (ptr) (and (listp ptr) (eq (car ptr) 'lex))) ;;; ;;; actions on packets from nodes ;;; (defun getpackets (node) (cadddr node)) (defun setpackets (node packets) (rplaca (cdddr node) packets)) ;;; ;;; actions on rules ;;; (defun getpriority (rule) (caddr rule)) (defun gettests (rule) (cadddr rule)) (defun getactions (rule) (cddddr rule)) (defun getpacket (rule) (cadr rule)) ;;; (defun drop() (insert1 (popstack) 1)) (defun insertat (word place) (insert1 (makeword word) place)) (defun activate ("e &rest packets) (setq packets (copy-list packets)) (mapc #'(lambda (p) (cond ((not (assoc p *activepackets*)) (setq *activepackets* (cons (or (assoc p *packetlibrary*) (error "not a packet" p)) *activepackets*))))) packets)) (defun deactivate ("e &rest packets) (setq packets (copy-list packets)) (mapc #'(lambda (p) (setq *activepackets* (delq (assoc p *activepackets*) *activepackets*))) packets)) (defun attach (subplace superplace) (let( (subber (item subplace)) (supper (item superplace)) ) (addtosubnodes subber supper) (cond ((and (numberp subplace) (greaterp subplace 0)) (rembuf subplace))) (cond ((nodep subber) (setsuper subber supper) (cond (*cfig* (pushstack subber)))) (*cfig* (error "can't make non-node current" subber))))) (defun close1 () (cond ((getsuper (car *stack*)) (popstack)) (t (error "can't close unattached node" (car *stack*))))) (defmacro restore() (popoffset)) ;;; (defun item (address) " Input: The address of the item being accessed Value: A node from the stack or an item from the buffer. " (prog (ref) (cond ((numberp address) (cond ((lessp address 0) (return (or (nth (sub1 (minus address)) *stack*) (error "address not valid")))) ((greaterp address 0) (setq ref (plus address *offset*)) (fillbuffer ref) (return (nth (sub1 ref) *buffer*))))) ((symbolp address) (return (or (prog (node) (setq node (car *stack*)) lp (cond ((and node (nodep node)) (cond ((eq address (getcat node)) (return node)) (t (setq node (getsuper node)) (go lp)))))) (error "not found on stack" address))))) (error "can't interpret address" address))) (defun offset (j) (setq *offsetstack* (cons *offset* *offsetstack*)) (setq *offset* (plus *offset* j))) (defmacro popoffset () '(setq *offset* (pop *offsetstack*))) (defun input () (setq *buffer* (nconc *buffer* (list (makeword (or (pop *inputstring*) '/.))))) (incr *bufferend*)) (defun insert1 (item position) (let( (afterpos (plus position *offset* -1)) tail ) (fillbuffer afterpos) (if (eq afterpos 0) (push item *buffer*) (setq tail (nthcdr (sub1 afterpos) *buffer*)) (rplacd tail (cons item (cdr tail)))) (incr *bufferend*))) (defun fillbuffer (i) (prog () lp (cond ((greaterp i *bufferend*) (input) (go lp))))) (defun popstack () (prog1 (car *stack*) (setq *stack* (cdr *stack*)) (setq *activepackets* nil) (apply 'activate (getpackets (car *stack*))))) (defun rembuf (position) (let( (afterpos (plus position *offset* -1)) tail ) (cond ((>= afterpos *bufferend*) nil) ((zerop afterpos) (pop *buffer*)) (t (setq tail (nthcdr (sub1 afterpos) *buffer*)) (rplacd tail (cddr tail)))) (decr *bufferend*))) (defun activepacketnames () (mapcar 'car *activepackets*)) (defun pushstack (node) (if *stack* (setpackets (car *stack*) (activepacketnames))) (setq *stack* (cons node *stack*)) (setq *activepackets* nil) (apply 'activate *alwayspackets*)) (defun makeword (word) (let( (lv (defns word)) ) (if (cdr lv) (error "can't handle multiple defn's for word" (cons word lv)) (cons 'lex (car lv))))) ;;; ;;; main routines ;;; (defun init-globals() (setq *offset* 0) (setq *bufferend* 0) (setq *buffer* (setq *stack* (setq *activepackets* (setq *offsetstack* nil))))) (defun marcus (inputstring) (setq *inputstring* inputstring) (init-globals) (newnode *topcat*) (apply 'activate *startpackets*) (while (dparse)) (showresults *stack* *buffer*)) (defun dparse () (prog (ruleset rs top best this rule) (setq top nil) (setq ruleset (mapcar #'cdr *activepackets*)) lp (setq rs ruleset) (while rs (cond ((car rs) (setq this (or (getpriority (caar rs)) *defaultpriority*)) (cond ((or (null top)(lessp this top)) (setq top this) (setq best rs))))) (setq rs (cdr rs))) (cond (top (setq rule (caar best)) (rplaca best (cdar best)) (when (dotests (gettests rule)) (mapc #'eval (getactions rule)) (return t))) (t (return))) (setq top nil) (go lp))) (defun dotests(tests) (cond ((neq (car tests) '/!) (prog (test) lp (cond (tests (setq test (car tests)) (cond ((dotest (car test)(cdr test)) (setq tests (cdr tests)) (go lp)) (t (return)))) (t (return t))))) (t (prog (i test tp) (setq i 0) lp (cond ((greaterp 1 (difference *windowsize* 1))(return))) (setq tp (cdr tests)) lp1 (cond (tp (setq test (car tp)) (cond ((dotest (cond ((and (numberp (car test)) (greaterp (car test) 0)) (plus i (car test))) (t (car test))) (cdr test)) (setq tp (cdr tp)) (go lp1)) (t (incr i) (go lp)))) (t (offset i)(return t))))))) (defun dotest (address conditions) (prog (iitem) (setq iitem (item address)) lp (cond (conditions (cond ((eq (car conditions) (cond ((nodep iitem) (getcat iitem)) ((lexp iitem) (car (cdr iitem))) (t nil))) (setq conditions (cdr conditions)) (go lp)) (t (return)))) (t (return address))))) (defun showresults (stack buffer) (cond ((or (prog (n) lp (cond (buffer (setq n (car buffer)) (cond ((and (lexp n) (eq (car (cdr n)) 'punct)) (setq buffer (cdr buffer)) (go lp)) (t (return t)))) (t (return)))) *inputstring*) (terpri) (princ "No Parse") (terpri))) (cond ((prog (s) (setq s stack) lp (cond ((cdr s) (cond ((eq (getsuper (car s)) (cadr s)) (setq s (cdr s)) (go lp)) (t (return t)))) (t (return)))) (terpri) (princ "Stack not fully connected") (terpri))) (printnode (car (last stack))) (terpri)) (defun tab (pos) (terpri) (while (greaterp pos 0) (princ " ") (decr pos))) (defun printnode (node) (setq p 0) (cond ((lexp node) (printlex (cdr node))) ((nodep node) (prog (pos) (print (getcat node)) (princ " ") (setq p (plus 2 p)) (setq pos p) (map #'(lambda (np) (printnode (car np)) (cond ((cdr np)(tab pos)))) (reverse (getsubnodes node))))))) (defun printlex (lab) (print (car lab)) (princ " ") (setq p (plus 2 p)) (map #'(lambda (f) (printfp (car f)) (cond ((cdr f) (princ " ") (setq p (plus 2 p))))) (cdr lab))) (defun printfp (fp) (princ "[") (princ (car fp)) (setq p (plus 2 p)) (mapc #'(lambda (v) (princ " ") (print v) (setq p (plus p 2))) (cdr fp)) (princ "]")) (defun run-marcus () (packpacks *rules*) (prog (inp previousstring) lp (terpri) (princ "Parse (as ") (princ *topcat*) (princ "): ") (marcus (cond ((eq (setq inp (read)) 'p) (print previousstring) (terpri) previousstring) ((listp inp) (setq previousstring inp)) (t (return)))) (go lp))) (defun packpacks (rules) (setq *packetlibrary* nil) (mapc #'stuffrule rules)) (defun stuffrule (r) (prog (p unknown) (cond ((not (setq p (assoc (getpacket r) *packetlibrary*))) (setq *packetlibrary* (cons (setq p (list (getpacket r))) *packetlibrary*)))) (rplacd p (insert r (cdr p) #'fun6 unknown)))) (defun fun6 (x y) (<= (or (getpriority x) *defaultpriority*) (or (getpriority y) *defaultpriority*))) ;;; ;;; Dictionary functions ;;; (defun defns (word) (cond ((lookup word)) (t (princ "Enter definition for") (princ word) (terpri) (enter word (read)) (defns word)))) (defvar word nil) (defun lookup (word) (mapcar #'lexaug (cdr (assoc word dict)))) (defun enter (word defs) (push (cons word (cond ((or (symbolp defs) (eq (car defs) 'e)) (err 'abort)) ((atom (car defs)) (list defs)) (t defs))) dict)) (defun checkred (fp) (cond ((null (assoc (car fp) *feats*)) (setq *feats* (cons fp *feats*))))) (defun lexaug (def) (prog (cat) (setq *feats* (cdr def)) (mapc 'checkred (cdr (assoc (setq cat (car def)) redun))) (cond ((null (assoc 'lex *feats*)) (setq *feats* (append *feats* (list (list 'lex word)))))))) ;;; At initialization (packpacks *rules*)