;;;-*- Mode:LISP; Package:CGOL; Base:10; Readtable:ZL -*- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; Based on a theory of parsing presented in: ;;; ;;; ;;; ;;; Pratt, Vaughan R., ``Top Down Operator Precedence,'' ;;; ;;; ACM Symposium on Principles of Programming Languages ;;; ;;; Boston, MA; October, 1973. ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; This is the tokenizer and system interface for the CGOL parser. ;;; All hairy conditional code is also in the file. The parser itself ;;; is all in CGOL and has no conditionalizations so that it may be ;;; cross-parsed more easily. ;; the idea here is to have a readtable such that every single ;; character causes a CGOLREAD to be invoked. (DEFVAR *CGOL-INVOKING-READTABLE* (COPY-READTABLE) "Readtable that always invokes CGOL parsing") (DEFVAR *CREAD-READTABLE* (COPY-READTABLE) "lisp syntax readtable that CGOL can escape back to") (SET-DISPATCH-MACRO-CHARACTER #\# #\ 'CGOL#-READMACRO *CREAD-READTABLE*) (DOTIMES (CHAR #o200) (COND ((NOT (OR (= CHAR #\SPACE) (= CHAR #\RETURN) (= CHAR #\TAB) (= CHAR #\FORM))) (SET-MACRO-CHARACTER (CODE-CHAR CHAR) 'CGOL-INVOKING-READ-MACRO NIL *CGOL-INVOKING-READTABLE*)))) #+LISPM (PROGN (SETF (SI:RDTBL-NAMES *CGOL-INVOKING-READTABLE*) (list "Cgol or Mlisp Style" "CGOL")) (PUSHNEW *CGOL-INVOKING-READTABLE* SI:*ALL-READTABLES*)) (DEFUN CGOL-INVOKING-READ-MACRO (STREAM INVOKING-CHAR) (CGOLREAD-RAW-WITH-P STREAM INVOKING-CHAR)) (DEFUN CGOL#-READMACRO (STREAM SUBCHAR ARG) SUBCHAR ARG (CGOLREAD-RAW-WITH-P STREAM NIL)) (DEFVAR IT NIL "Used by CGOL as a super-temporary") (defvar token ()) (defvar stringnud ()) (defvar cgolerr () "controls throws for eof condition") (defvar ctoken-table ()) (defvar ret-nud () "The instance variable of a recyled closure") (defun ret-nud () ret-nud) (defvar fun 'TOP-LEVEL) (defvar silence -1) (defvar free-kons ()) (defvar ctyipeek () "keep our own one-character look-ahead.") (defvar cgol-input ()) (defvar *cgol-package* (find-package "CGOL")) (DEFUN CGOLREAD (&REST READ-ARGS) (LET ((*READTABLE* *CGOL-INVOKING-READTABLE*)) (APPLY #'READ READ-ARGS))) (DEFUN CGOLREAD-RAW-WITH-P (CGOL-INPUT PEEK) (TOPLEVEL-PARSE-1 PEEK)) (DEFUN TOPLEVEL-PARSE-1 (CTYIPEEK &AUX ;; State variables. TOKEN STRINGNUD RET-NUD (FUN 'TOP-LEVEL)) (ADVANCE) (PARSE -1)) (DEFUN CGOLERR (MESSAGE LEVEL FATALP) LEVEL FATALP #+LISPM (SI:READ-ERROR "~A IN ~A" MESSAGE FUN) #-LISPM (ERROR MESSAGE)) (defmacro mtyi () '(LET ((C (FUNCALL CGOL-INPUT ':TYI))) (IF (NULL C) -1 C))) (defun ctyi () (IF (NULL CTYIPEEK) (MTYI) (PROG1 CTYIPEEK (SETQ CTYIPEEK ())))) (defun ctyipeek () (IF (NULL CTYIPEEK) (SETQ CTYIPEEK (MTYI)) CTYIPEEK)) (defun cuntyi (c) (SETQ CTYIPEEK C)) (defun cgoltyipeek ()(ctyipeek)) (defun cread () (LET ((*READTABLE* *CREAD-READTABLE*)) (read cgol-input))) ;;; Macros and functions used by the tokenizer loop. (defmacro clookup (x y) `(ASSQ ,X ,Y)) (defmacro return-token (c l &optional (quoted-p 'quoted-p) (reversed-p t)) `(progn ,(if c `(cuntyi ,c)) (return (make-token ,l ,quoted-p ,reversed-p)))) ;; The tokenizer is a simple loop with the character TYI'd pushed on the ;; token buffer after a series of special cases are checked. ;; The boolean state variables could be replaced with predicates ;; that look back on what in in the buffer, however the present implementation ;; is highly straightforward. (defun cgoltoken () (do ((l () (KONS c l)) (c (cskip-whitespace) (ctyi)) (temp) (quoted-p ()) (fixnum-p ()) (flonum-p ()) (expt-p ()) (digit-after-expt-p ()) ) (()) (cond ((= c -1) (if (null l) (cgolerr "EOF encountered inside cgol-exp - CGOLREAD" 2 t) (return-token c l))) ((or (= c #/$) (= c #\ALT)) (if (null l) (return ') (return-token c l))) ((= c #/!) (if (null l) (return (cread)) (return-token c l))) ((= c #/?) (setq quoted-p t) (setq flonum-p ()) (setq fixnum-p ()) (setq c (ctyi))) ((= c #/") (if (null l) (let ((x (ctoken-string))) (setq ret-nud `',x stringnud #'ret-nud) (return x)) (return-token c l))) ((cwhitespacep c) (return-token c l)) ((= c #/.) (cond ((null l) (if (cdigit-p (ctyipeek)) (setq fixnum-p () flonum-p t) (return '/.))) ((null fixnum-p) (return-token c l t)) ('ELSE (if fixnum-p (setq flonum-p t)) (setq fixnum-p ())))) ((and (or (= c #/E) (= c #/e)) flonum-p (not expt-p)) (let ((p (ctyipeek))) (if (not (or (= p #/+) (= p #/-) (cdigit-p p))) (return-token c l))) (setq expt-p t)) ((cdigit-p c) (if (null l) (setq fixnum-p t)) (if expt-p (setq digit-after-expt-p t))) ((and (or (= c #/+) (= c #/-)) flonum-p expt-p (not digit-after-expt-p) (cdigit-p (ctyipeek)))) ((setq temp (clookup (setq c (ICHAR-UPCASE c)) ctoken-table)) (if (null l) (return-token () (KONS c (cfollow-tail (cdr temp))) t ()) (return-token c l))) ('ELSE (setq fixnum-p ()) (setq flonum-p ()))))) (defun cwhitespacep (c) (or (= c #\SP) (= c #\CR) (= c #\LF) (= c #\TAB) (= c #\FF) (= c #/%))) (defun cskip-whitespace () (do ((commentp ())(c)) (()) (setq c (ctyi)) (cond ((= c #/%) (setq commentp (not commentp))) ((cwhitespacep c)) ((NOT COMMENTP) (RETURN C))))) (defun initialize-multi-character-token-table (string) (setq ctoken-table (mapcar #'list (exploden string)))) (defun cfollow-tail (alist) ;; this way of recognizing tokens is taken from the original cgol, ;; is fast and easy and passes all tokens which are subtokens ;; of explicitely defined tokens. (IF (NULL ALIST) () (let ((c (ICHAR-UPCASE (ctyipeek)))) (cond ((setq alist (clookup c alist)) (ctyi) (KONS c (cfollow-tail (cdr alist)))) ('ELSE ()))))) (defun puttok (token) ;; entry point for defining tokens. (let ((l (exploden token))) (or (clookup (car l) ctoken-table) (error "token with illegal first character" token)) (setq ctoken-table (inserttok l ctoken-table)))) (defun inserttok (tok toktable) (if (null tok) toktable (let ((st (clookup (car tok) toktable))) (cond ((null st) (cons (cons (car tok) (inserttok (cdr tok) ())) toktable)) ('ELSE (rplacd st (inserttok (cdr tok) (cdr st))) toktable))))) (defun ctoken-string () ;; this is as in the original cgol, #/? is used to quote ;; #/$ or #/? and #/" is used to quote #/". (do ((c (ctyi) (ctyi)) (l () (KONS c l))) (()) (cond ((or (= c #/$) (= c #\ALT)) ;; a little Dwim. (cgolerr "tokenizer has inserted missing /" before " 0 ()) (return-token c l ())) ((= c #/") (if (= (ctyipeek) #/") (ctyi) (return-token () l ()))) ((and (= c #/?) (or (= (ctyipeek) #/$) (= (ctyipeek) #\ALT))) (setq c (ctyi)))))) (defun cdigit-p (x) (not (or (< x #/0) (> x #/9)))) (DEFUN ICHAR-UPCASE (C) (IF (AND (>= C #/a) (<= C #/z)) (- C #.(- #/a #/A)) C)) (defun make-token (l do-not-try-as-number-p rp) ;; takes the stack of characters and makes a token. (if rp (setq l (nreverse l))) (prog1 (if (or do-not-try-as-number-p (not (ok-as-number-p l))) (implode l) (creadlist l)) (reklaim l))) (defun creadlist (l) (let ((*readtable* *cread-readtable*) (ibase 10)) (readlist l))) (defun ok-as-number-p (l) ;; its more efficient to determine the type of ;; the token by collecting information in state variables ;; as it is read. However we aren't that sure of our book-keeping. (numberp (catch-error (creadlist l) nil))) ;; Keeping our own free-list is a way to use lists for stacks without the ;; overhead of garbage collection. It works everywhere and is a simple add-on, ;; whereas string and fill-pointer hair is not. (defun kons (kar kdr) (if free-kons (PROGN (rplaca free-kons kar) (rplacd (prog1 free-kons (setq free-kons (cdr free-kons))) kdr)) (cons kar kdr))) (defun reklaim (l) (setq free-kons (nconc l free-kons))) ;; Interface functions, and compatibility. (DEFUN FORMCHECK (FORM) ;; check for various obsolete usages. FORM) ;; these functions deal with the package problem in token identity ;; CGOL was written very much pre-package-system. (defun getden (indl) (and (symbolp token) (do ((l indl (cdr l))) ((null l)) (let ((x (get token (car l)))) (and x (return x)) (and (setq x (intern-soft token *cgol-package*)) (setq x (get x (car l))) (return x)))))) (defun pkgcheck (x) (if (symbolp x) (or (intern-soft x *cgol-package*) x))) (DEFUN MAKE_LAMBDA_VALUE (X) #'(LAMBDA () X)) (defun LAND (&REST L) (NOT (MEMQ NIL L))) (DEFUN CGOL-BOOTSTRAP (INPUT-FILENAME OUTPUT-FILENAME &OPTIONAL &KEY PRETTY-PRINT) (LABELS ((VPFORM (X) (COND ((ATOM X)) ((EQ (CAR X) 'PROGN) (DOLIST (F (CDR X)) (VPFORM F))) ((EQ (CAR X) 'DEFUN) (PRINT (CADR X))))) (FPFORM (X STREAM) (COND ((ATOM X)) ((EQ (CAR X) 'QUOTE)) ((EQ (CAR X) 'PROGN) (DOLIST (M (CDR X)) (FPFORM M STREAM))) (PRETTY-PRINT (PPRINT X STREAM) (TERPRI STREAM)) ('ELSE (PRINT X STREAM))))) (WITH-OPEN-FILE (IN INPUT-FILENAME) (WITH-OPEN-FILE (OUT OUTPUT-FILENAME :DIRECTION :OUTPUT) (FORMAT OUT ";;;-*-MODE:LISP;PACKAGE:CGOL;READTABLE:T;BASE:10-*-~2%") (LET ((*PACKAGE* (FIND-PACKAGE "CGOL")) (BASE 10)) (DO ((EOF (LIST NIL)) (FORM)) ((EQ (SETQ FORM (CGOLREAD IN EOF)) EOF)) (VPFORM FORM) (FPFORM FORM OUT) (PRINC ".")))))))