Skip to content
reader.lisp 71.2 KiB
Newer Older
ram's avatar
ram committed
;;; -*- Log: code.log; Package: Lisp -*-
;;;
;;; **********************************************************************
;;; This code was written as part of the CMU Common Lisp project at
;;; Carnegie Mellon University, and has been placed in the public domain.
;;;
(ext:file-comment
  "$Header: src/code/reader.lisp $")
ram's avatar
ram committed
;;; **********************************************************************
;;;
;;; Spice Lisp Reader 
;;; Written by David Dill
;;; Package system interface by Lee Schumacher.
;;; Runs in the standard Spice Lisp environment.
;;;
(export '*ignore-extra-close-parentheses*)

(in-package "LISP")
(export '(readtable readtable-case readtablep *read-base* *readtable*
	  copy-readtable set-syntax-from-char set-macro-character
	  get-macro-character make-dispatch-macro-character
	  set-dispatch-macro-character get-dispatch-macro-character read
	  *read-default-float-format* read-preserving-whitespace
	  read-delimited-list parse-integer read-from-string *read-suppress*
	  reader-error))

ram's avatar
ram committed

;;;Random global variables

(defvar *read-default-float-format* 'single-float "Float format for 1.0E1")
(declaim (type (member short-float single-float double-float long-float)
	       *read-default-float-format*))
ram's avatar
ram committed

(defvar *readtable*)
(declaim (type readtable *readtable*))
(setf (documentation '*readtable* 'variable)
       _N"Variable bound to current readtable.")
ram's avatar
ram committed

(define-condition reader-error (parse-error stream-error)
  ((format-control
    :reader reader-error-format-control
    :initarg :format-control)
   (format-arguments
    :reader reader-error-format-arguments
    :initarg :format-arguments
    :initform '()))
  (:report
   (lambda (condition stream)
     (let ((error-stream (stream-error-stream condition)))
       (when c:*compiler-notification-function*
         (funcall c:*compiler-notification-function* :error
                  (apply #'format nil
                         (reader-error-format-control condition)
                         (reader-error-format-arguments condition))
                  nil error-stream
                  (file-position error-stream)))
       (format stream (intl:gettext "Reader error ~@[at ~D ~]on ~S:~%~?")
	       (file-position error-stream) error-stream
	       (reader-error-format-control condition)
	       (reader-error-format-arguments condition))))))
(define-condition reader-package-error (reader-error) ())

;;; %READ-ERROR  --  Interface
;;;
;;;    Like, signal a READ-ERROR, man...
;;;
(defun %reader-error (stream control &rest args)
  (error 'reader-error :stream stream  :format-control control
	 :format-arguments args))

(define-condition reader-eof-error (end-of-file)
  ((context :reader reader-eof-error-context :initarg :context))
  (:report
   (lambda (condition stream)
     (format stream (intl:gettext "Unexpected EOF on ~S ~A.")
	     (stream-error-stream condition)
	     (reader-eof-error-context condition)))))

(defun reader-eof-error (stream context)
  (error 'reader-eof-error :stream stream  :context context))
  
ram's avatar
ram committed

;;;; Readtable implementation.


(defvar std-lisp-readtable ()
  "Standard lisp readtable. This is for recovery from broken
ram's avatar
ram committed
   read-tables, and should not normally be user-visible.")

;; Max size of the attribute table before we switch from an array to a
;; hash-table.
(defconstant attribute-table-limit
  #-unicode char-code-limit
  #+unicode 256)

(deftype attribute-table ()
  '(simple-array (unsigned-byte 8) (#.attribute-table-limit)))
ram's avatar
ram committed
(defstruct (readtable
	    (:conc-name nil)
	    (:predicate readtablep)
	    (:copier nil)
	    (:print-function
	     (lambda (s stream d)
	       (declare (ignore d))
	       (print-unreadable-object (s stream :identity t)
		 (prin1 'readtable stream)))))
  "Readtable is a data structure that maps characters into syntax
ram's avatar
ram committed
   types for the Common Lisp expression reader."
  ;; The CHARACTER-ATTRIBUTE-TABLE is a vector of ATTRIBUTE-TABLE-LIMIT integers for
  ;; describing the character type.  Conceptually, there are 4 distinct
  ;; "primary" character attributes: WHITESPACE, TERMINATING-MACRO, ESCAPE, and
  ;; CONSTITUENT.  Non-terminating macros (such as the symbol reader) have the
  ;; attribute CONSTITUENT.
  ;;
  ;; In order to make the READ-TOKEN fast, all this information is
  ;; stored in the character attribute table by having different varieties of
  ;; constituents.
  (character-attribute-table
   (make-array attribute-table-limit
	       :element-type '(unsigned-byte 8)
	       :initial-element constituent)
   :type attribute-table)
  ;; The CHARACTER-MACRO-TABLE is a vector of ATTRIBUTE-TABLE-LIMIT functions.  One
  ;; of these functions called with appropriate arguments whenever any
  ;; non-WHITESPACE character is encountered inside READ-PRESERVING-WHITESPACE.
  ;; These functions are used to implement user-defined read-macros, system
  ;; read-macros, and the number-symbol reader.
  (character-macro-table 
   (make-array attribute-table-limit :initial-element #'undefined-macro-char)
   :type (simple-vector #.attribute-table-limit))
  ;;
  ;; DISPATCH-TABLES entry, which is an alist from dispatch characters to
  ;; vectors of CHAR-CODE-LIMIT functions, for use in defining dispatching
  ;; macros (like #-macro).
  (dispatch-tables () :type list)
  (%readtable-case :upcase :type (member :upcase :downcase :preserve :invert))
  ;;
  ;; The CHARACTER-ATTRIBUTE-HASH-TABLE handles the case of char codes
  ;; above ATTRIBUTE-TABLE-LIMIT, since we expect these to be
  ;; relatively rare.
  #+unicode
  (character-attribute-hash-table (make-hash-table)
				  :type (or null hash-table))
  ;;
  ;; The CHARACTER-MACRO-HASH-TABLE handles the case of char codes
  ;; above ATTRIBUTE-TABLE-LIMIT, since we expect these to be
  ;; relatively rare.
  #+unicode
  (character-macro-hash-table (make-hash-table)
			      :type (or null hash-table)))
ram's avatar
ram committed


;;;; Constants for character attributes.  These are all as in the manual.

(eval-when (compile load eval)
  (defconstant whitespace 0)
  (defconstant terminating-macro 1)
  (defconstant escape 2)
  (defconstant constituent 3)
  (defconstant constituent-dot 4)
  (defconstant constituent-expt 5)
  (defconstant constituent-slash 6)
  (defconstant constituent-digit 7)
  (defconstant constituent-sign 8)
ram's avatar
ram committed
  (defconstant multiple-escape 10)
  (defconstant package-delimiter 11)
  ;;fake attribute for use in read-unqualified-token
  (defconstant delimiter 12)
  ;; More fake attributes for used for reading numbers.
  ;;
  ;; The following two are not static but depend on *READ-BASE*.
  ;; DECIMAL-DIGIT is for characters being digits in base 10 but not in
  ;; base *READ-BASE* (which is therefore perforce smaller than 10);
  ;; DIGIT-OR-EXPT is for characters being both exponent markers and
  ;; digits in base *READ-BASE* (which is therefore perforce larger
  ;; than 10).
  
  (defconstant constituent-decimal-digit 13)
  (defconstant constituent-digit-or-expt 14)
  ;; Invalid constituent character
  (defconstant constituent-invalid 15))
ram's avatar
ram committed


;;;; Package specials.

(defvar *old-package* ()
  "Value of *package* at the start of the last read or Nil.")
ram's avatar
ram committed

;;; In case we get an error trying to parse a symbol, we want to rebind the
;;; above stuff so it's cool.

pw's avatar
pw committed
(declaim (special *package* *keyword-package* *read-base*))
ram's avatar
ram committed



;;;; Macros and functions for character tables.

#-unicode
ram's avatar
ram committed
(defmacro get-cat-entry (char rt)
  ;;only give this side-effect-free args.
  `(elt (character-attribute-table ,rt)
    (char-code ,char)))
ram's avatar
ram committed

#+unicode
(defmacro get-cat-entry (char rt)
  ;;only give this side-effect-free args.
  `(if (< (char-code ,char) attribute-table-limit)
       (elt (character-attribute-table ,rt)
	    (char-code ,char))
       (gethash (char-code ,char)
	        (character-attribute-hash-table ,rt)
	        ;; Default is constituent, because the attribute table
	        ;; is initialized to constituent
	        constituent)))

#-unicode
ram's avatar
ram committed
(defun set-cat-entry (char newvalue &optional (rt *readtable*))
  (setf (elt (character-attribute-table rt)
	     (char-code char))
ram's avatar
ram committed
	newvalue))

#+unicode
(defun set-cat-entry (char newvalue &optional (rt *readtable*))
  (let ((code (char-code char)))
    (if (< code attribute-table-limit)
	(setf (elt (character-attribute-table rt)
		   code)
	      newvalue)
	(unless (= newvalue constituent)
	  ;; The default value (in get-cat-entry) is constituent, so
	  ;; don't enter it into the hash table.
	  (setf (gethash code (character-attribute-hash-table rt))
		newvalue)))))

#-unicode
ram's avatar
ram committed
(defmacro get-cmt-entry (char rt)
  `(the function
	(elt (the simple-vector (character-macro-table ,rt))
	     (char-code ,char))))
ram's avatar
ram committed

#+unicode
(defmacro get-cmt-entry (char rt)
   `(if (< (char-code ,char) attribute-table-limit)
       (the function
	 (elt (the simple-vector (character-macro-table ,rt))
	      (char-code ,char)))
       (gethash (char-code ,char)
	        (character-macro-hash-table ,rt)
	        ;; This default value needs to be coordinated with
	        ;; init-std-lisp-readtable.
	        #'read-token)))

#-unicode
ram's avatar
ram committed
(defun set-cmt-entry (char newvalue &optional (rt *readtable*))
  (setf (elt (the simple-vector (character-macro-table rt))
	     (char-code char))
	(coerce newvalue 'function)))
ram's avatar
ram committed

#+unicode
(defun set-cmt-entry (char newvalue &optional (rt *readtable*))
  (let ((code (char-code char)))
    (if (< code attribute-table-limit)
	(setf (elt (the simple-vector (character-macro-table rt))
		   code)
	      (coerce newvalue 'function))
	(let ((f (coerce newvalue 'function)))
	  ;; Don't add an entry if the function would be the same as
	  ;; the default.  This needs to be coordinated with
	  ;; GET-CMT-ENTRY above.
	  (if (eq f #'read-token)
	      f
	      (setf (gethash code (character-macro-hash-table rt))
		    f))))))
(defun undefined-macro-char (stream char)
  (unless *read-suppress*
    (%reader-error stream (intl:gettext "Undefined read-macro character ~S") char)))
ram's avatar
ram committed

;;; The character attribute table is a CHAR-CODE-LIMIT vector of integers. 
ram's avatar
ram committed

(defmacro test-attribute (char whichclass rt)
  `(= (the fixnum (get-cat-entry ,char ,rt)) ,whichclass))
ram's avatar
ram committed

;;; Predicates for testing character attributes

;;; Make this a function, since other people want to use it.
;;;
pw's avatar
pw committed
(declaim (inline whitespacep))
ram's avatar
ram committed
(defun whitespacep (char &optional (rt *readtable*))
  (test-attribute char whitespace rt))

(defmacro constituentp (char &optional (rt '*readtable*))
  `(test-attribute ,char #.constituent ,rt))
ram's avatar
ram committed

(defmacro terminating-macrop (char &optional (rt '*readtable*))
  `(test-attribute ,char #.terminating-macro ,rt))

(defmacro escapep (char &optional (rt '*readtable*))
  `(test-attribute ,char #.escape ,rt))

(defmacro multiple-escape-p (char &optional (rt '*readtable*))
  `(test-attribute ,char #.multiple-escape ,rt))

(defmacro token-delimiterp (char &optional (rt '*readtable*))
  ;;depends on actual attribute numbering above.
  `(<= (get-cat-entry ,char ,rt) #.terminating-macro))



;;;; Secondary attribute table.

;;; There are a number of "secondary" attributes which are constant properties
;;; of characters characters (as long as they are constituents).

(defvar secondary-attribute-table)
(declaim (type attribute-table secondary-attribute-table))
ram's avatar
ram committed

(defun set-secondary-attribute (char attribute)
  (setf (elt secondary-attribute-table (char-code char))
ram's avatar
ram committed
	attribute))


(defun init-secondary-attribute-table ()
  (setq secondary-attribute-table
	(make-array attribute-table-limit
		    :element-type '(unsigned-byte 8)
		    :initial-element #.constituent))
ram's avatar
ram committed
  (set-secondary-attribute #\: #.package-delimiter)
  ;;(set-secondary-attribute #\| #.multiple-escape)	; |) [For EMACS]
ram's avatar
ram committed
  (set-secondary-attribute #\. #.constituent-dot)
  (set-secondary-attribute #\+ #.constituent-sign)
  (set-secondary-attribute #\- #.constituent-sign)
  (set-secondary-attribute #\/ #.constituent-slash)  
  (do ((i (char-code #\0) (1+ i)))
      ((> i (char-code #\9)))
    (set-secondary-attribute (code-char i) #.constituent-digit))
ram's avatar
ram committed
  (set-secondary-attribute #\E #.constituent-expt)
  (set-secondary-attribute #\F #.constituent-expt)
  (set-secondary-attribute #\D #.constituent-expt)
  (set-secondary-attribute #\S #.constituent-expt)
  (set-secondary-attribute #\L #.constituent-expt)
  (set-secondary-attribute #\e #.constituent-expt)
  (set-secondary-attribute #\f #.constituent-expt)
  (set-secondary-attribute #\d #.constituent-expt)
  (set-secondary-attribute #\s #.constituent-expt)
  (set-secondary-attribute #\l #.constituent-expt)
  #+double-double
  (progn
    (set-secondary-attribute #\W #.constituent-expt)
    (set-secondary-attribute #\w #.constituent-expt))
  ;; See CLHS 2.1.4.2 for the list of constituent characters that are
  ;; invalid constituent characters.
  (set-secondary-attribute #\Space #.constituent-invalid)
  (set-secondary-attribute #\Newline #.constituent-invalid)
  (dolist (c '(#\backspace #\tab #\page #\return #\rubout))
    (set-secondary-attribute c #.constituent-invalid)))
ram's avatar
ram committed

#-unicode
ram's avatar
ram committed
(defmacro get-secondary-attribute (char)
  `(elt secondary-attribute-table
    (char-code ,char)))

#+unicode
(defmacro get-secondary-attribute (char)
  `(if (< (char-code ,char) attribute-table-limit)
       (elt secondary-attribute-table
	    (char-code ,char))
       constituent))

ram's avatar
ram committed


;;; Character dispatch stuff

;; For non-unicode stuff, a simple vector of 256 elements works fine.
#-unicode
(defun make-char-dispatch-table ()
  (make-array attribute-table-limit :initial-element #'dispatch-char-error))

#-unicode
(declaim (inline copy-char-dispatch-table))
#-unicode
(defun copy-char-dispatch-table (dispatch)
  (copy-seq dispatch))

;; For unicode, we define a structure to hold a vector and a
;; hash-table to conserve space instead of using a huge vector.  The
;; vector handles the common case for 8-bit characters, and the hash
;; table handles the rest.
#+unicode
(defstruct (char-dispatch-table
	     (:copier nil)
	     (:print-function
	      (lambda (s stream d)
		(declare (ignore d))
		(print-unreadable-object (s stream :identity t :type t)
		  ))))
  ;; TABLE is a vector for quick access for 8-bit characters
  (table (make-array attribute-table-limit :initial-element #'dispatch-char-error)
	 :type (simple-vector #.attribute-table-limit))
  ;; HASH-TABLE is for handling the (presumably) rare case of dispatch
  ;; characters above attribute-table-limit.
  (hash-table (make-hash-table)
	      :type hash-table))

#+unicode
(defun copy-char-dispatch-table (dispatch)
  ;; Make a new dispatch table and copy the contents to it
  (let* ((new (make-char-dispatch-table))
	 (h (char-dispatch-table-hash-table new)))
    (replace (char-dispatch-table-table new)
	     (char-dispatch-table-table dispatch))
    (maphash #'(lambda (key val)
		 (setf (gethash key h) val))
	     (char-dispatch-table-hash-table dispatch))
    new))
    
(declaim (inline get-dispatch-char set-dispatch-char))

#-unicode
(defun get-dispatch-char (char dispatch)
  (elt (the simple-vector dispatch)
       (char-code char)))

#+unicode
(defun get-dispatch-char (char dispatch)
  (let ((code (char-code char)))
    (if (< code attribute-table-limit)
	(elt (the simple-vector (char-dispatch-table-table dispatch))
	     code)
	(gethash char (char-dispatch-table-hash-table dispatch)
		 #'dispatch-char-error))))

#-unicode
(defun set-dispatch-char (char new-value dispatch)
  (setf (elt (the simple-vector dispatch)
	     (char-code char))
	(coerce new-value 'function)))

#+unicode
(defun set-dispatch-char (char new-value dispatch)
  (let ((code (char-code char)))
    (if (< code attribute-table-limit)
	(setf (elt (the simple-vector (char-dispatch-table-table dispatch))
		   (char-code char))
	      (coerce new-value 'function))
	(setf (gethash char (char-dispatch-table-hash-table dispatch))
	      (coerce new-value 'function)))))
ram's avatar
ram committed


;;;; Readtable operations.

Raymond Toy's avatar
Raymond Toy committed
(defun assert-not-standard-readtable (readtable operation)
  (when (eq readtable std-lisp-readtable)
    (cerror "Modify it anyway." 'kernel:standard-readtable-modified-error
	    :operation operation)))

(defun readtable-case (table)
  (%readtable-case table))

(defun (setf readtable-case) (new-case table)
  (assert-not-standard-readtable table '(setf readtable-case))
  (setf (%readtable-case table) new-case))

ram's avatar
ram committed
(defun copy-readtable (&optional (from-readtable *readtable*) to-readtable)
  "A copy is made of from-readtable and place into to-readtable."
Raymond Toy's avatar
Raymond Toy committed
  (assert-not-standard-readtable to-readtable 'copy-readtable)
  (let ((from-readtable (or from-readtable std-lisp-readtable))
	(to-readtable (or to-readtable (make-readtable))))
    (flet ((copy-hash-table (to from)
	     (clrhash to)
	     (maphash #'(lambda (key val)
			  (setf (gethash key to) val))
		      from)))
      ;;physically clobber contents of internal tables.
      (replace (character-attribute-table to-readtable)
	       (character-attribute-table from-readtable))
      (replace (character-macro-table to-readtable)
	       (character-macro-table from-readtable))
      #+unicode
      (progn
	(copy-hash-table (character-attribute-hash-table to-readtable)
			 (character-attribute-hash-table from-readtable))
	(copy-hash-table (character-macro-hash-table to-readtable)
			 (character-macro-hash-table from-readtable)))
      (setf (dispatch-tables to-readtable)
	    (mapcar #'(lambda (pair) (cons (car pair)
					   (copy-char-dispatch-table (cdr pair))))
		    (dispatch-tables from-readtable)))
      (setf (readtable-case to-readtable)
	    (readtable-case from-readtable))
      to-readtable)))
ram's avatar
ram committed

(defun set-syntax-from-char (to-char from-char &optional
				     (to-readtable *readtable*)
				     (from-readtable ()))
  "Causes the syntax of to-char to be the same as from-char in the 
  optional readtable (defaults to the current readtable).  The
  from-table defaults the standard lisp readtable by being nil."
Raymond Toy's avatar
Raymond Toy committed
  (assert-not-standard-readtable to-readtable 'set-syntax-from-char)
  (let ((from-readtable (or from-readtable std-lisp-readtable)))
    ;;copy from-char entries to to-char entries, but make sure that if
    ;;from char is a constituent you don't copy non-movable secondary
    ;;attributes (constituent types), and that said attributes magically
    ;;appear if you transform a non-constituent to a constituent.
    (let ((att (get-cat-entry from-char from-readtable))
	  (mac (get-cmt-entry from-char from-readtable))
	  (from-dpair (find from-char (dispatch-tables from-readtable)
			    :test #'char= :key #'car))
	  (to-dpair (find to-char (dispatch-tables to-readtable)
			  :test #'char= :key #'car)))
      (if (constituentp from-char from-readtable)
	  (setq att (get-secondary-attribute to-char)))
      (set-cat-entry to-char att to-readtable)
      (set-cmt-entry to-char
		     mac
		     to-readtable)
      ;; Copy the reader macro functions too if from-char is a
      ;; dispatching macro character.
      ;;(format t "from-dpair = ~A~%" from-dpair)
      (when from-dpair
	(cond (to-dpair
	       ;; The to-readtable already has a dispatching table for
	       ;; this character.  Replace it with a copy of the
	       ;; dispatching table from from-readtable.
	       (setf (cdr to-dpair) (copy-char-dispatch-table (cdr from-dpair))))
	      (t
	       ;; The to-readtable doesn't have such an entry.  Add a
	       ;; copy of dispatch table from from-readtable to the
	       ;; dispatch table of the to-readtable.
	       (let ((pair (cons to-char (copy-char-dispatch-table (cdr from-dpair)))))
		 (setf (dispatch-tables to-readtable)
		       (push pair (dispatch-tables to-readtable)))))))))
ram's avatar
ram committed

(defun set-macro-character (char function &optional
				 (non-terminatingp nil) (rt *readtable*))
  "Causes char to be a macro character which invokes function when
ram's avatar
ram committed
   seen by the reader.  The non-terminatingp flag can be used to
   make the macro character non-terminating.  The optional readtable
   argument defaults to the current readtable.  Set-macro-character
   returns T."
Raymond Toy's avatar
Raymond Toy committed
  (let ((designated-readtable (or rt std-lisp-readtable)))
    (assert-not-standard-readtable designated-readtable 'set-macro-character)
    (if non-terminatingp
	(set-cat-entry char (get-secondary-attribute char) designated-readtable)
	(set-cat-entry char #.terminating-macro designated-readtable))
    (set-cmt-entry char function designated-readtable))
ram's avatar
ram committed
  T)

(defun get-macro-character (char &optional (rt *readtable*))
  "Returns the function associated with the specified char which is a macro
  character.  The optional readtable argument defaults to the current
  readtable."
  (let ((rt (or rt std-lisp-readtable)))
    ;; Check macro syntax, return associated function if it's there.
    ;; Returns a value for all constituents.
    (cond ((constituentp char rt)
	   (values (get-cmt-entry char rt) t))
	  ((terminating-macrop char rt)
	   (values (get-cmt-entry char rt) nil))
	  (t nil))))
ram's avatar
ram committed


;;;; These definitions support internal programming conventions.

(defconstant eof-object '(*eof*))

(defmacro eofp (char) `(eq ,char eof-object))

(defun flush-whitespace (stream)
dtc's avatar
dtc committed
  ;; This flushes whitespace chars, returning the last char it read (a
  ;; non-white one).  It always gets an error on end-of-file.
  (let ((stream (in-synonym-of stream)))
toy's avatar
toy committed
    (stream-dispatch stream
      ;; simple-stream
      (do ((char (stream::%read-char stream t nil t t)
toy's avatar
toy committed
		 (stream::%read-char stream t nil t t)))
	  ((not (test-attribute char #.whitespace *readtable*))
toy's avatar
toy committed
	   char))
      ;; lisp-stream
      (prepare-for-fast-read-char stream
	(do ((char (fast-read-char t) (fast-read-char t)))
	    ((not (test-attribute char #.whitespace *readtable*))
toy's avatar
toy committed
	     (done-with-fast-read-char)
	     char)))
      ;; fundamental-stream
      (do ((char (stream-read-char stream) (stream-read-char stream)))
toy's avatar
toy committed
	  ((or (eq char :eof)
	       (not (test-attribute char #.whitespace *readtable*)))
toy's avatar
toy committed
	   (if (eq char :eof)
	       (error 'end-of-file :stream stream)
	       char))))))
ram's avatar
ram committed


;;;; Temporary initialization hack.

(defun init-std-lisp-readtable ()
  (setq std-lisp-readtable (make-readtable))
  ;;all characters default to "constituent" in make-readtable
  ;;*** un-constituent-ize some of these ***
Raymond Toy's avatar
Raymond Toy committed
  (handler-bind
      ((standard-readtable-modified-error
	(lambda (c)
	  (declare (ignore c))
	  ;; Of course, we want to be able to modify the standard
	  ;; readtable here!
	  (invoke-restart 'kernel::continue))))
    (let ((*readtable* std-lisp-readtable)
	  (*assert-not-standard-readtable* nil))
      (set-cat-entry #\tab #.whitespace)
      (set-cat-entry #\linefeed #.whitespace)  
      (set-cat-entry #\space #.whitespace)
      (set-cat-entry #\page #.whitespace)
      (set-cat-entry #\return #.whitespace)
      (set-cat-entry #\\ #.escape)
      (set-cat-entry #\| #.multiple-escape)
      (set-cmt-entry #\\ #'read-token)
      (set-cmt-entry #\: #'read-token)
      (set-cmt-entry #\| #'read-token)
      ;;macro definitions
      (set-macro-character #\" #'read-string)
      ;;* # macro
      (set-macro-character #\' #'read-quote)
      (set-macro-character #\( #'read-list)
      (set-macro-character #\) #'read-right-paren)
      (set-macro-character #\; #'read-comment)
      ;;* backquote
      ;;all constituents
      (do ((ichar 0 (1+ ichar))
	   (len #+unicode-bootstrap #o200
		#-unicode-bootstrap char-code-limit))
	  ((= ichar len))
	(let ((char (code-char ichar)))
	  #-unicode
	  (when (constituentp char std-lisp-readtable)
	    (set-cat-entry char (get-secondary-attribute char))
	    (set-cmt-entry char #'read-token))
	  #+unicode
	  (cond ((constituentp char std-lisp-readtable)
		 (set-cat-entry char (get-secondary-attribute char))
		 (when (< ichar attribute-table-limit)
		   ;; The hashtable default in get-cmt-entry returns
		   ;; #'read-token, so don't need to set it here.
		   (set-cmt-entry char #'read-token)))
		((>= ichar attribute-table-limit)
		 ;; A non-constituent character that would be stored in
		 ;; the hash table gets #'undefined-macro-char.
		 (set-cmt-entry char #'undefined-macro-char))))))))
ram's avatar
ram committed



;;;; read-buffer implementation.

gerd's avatar
gerd committed
(defvar *read-buffer*)
(defvar *read-buffer-length*)
ram's avatar
ram committed

gerd's avatar
gerd committed
(defvar *inch-ptr*)
(defvar *ouch-ptr*)
ram's avatar
ram committed

gerd's avatar
gerd committed
(declaim (type index *read-buffer-length* *inch-ptr* *ouch-ptr*))
(declaim (simple-string *read-buffer*))

(defconstant +read-buffer-pool-size+ 16)
(defconstant +read-buffer-initial-size+ 16)

(defun make-read-buffer-stack (size count)
  (let ((stack (make-array count :fill-pointer 0)))
    (dotimes (i count)
      (vector-push (make-string size) stack))
    stack))

(defvar *read-buffer-stack*)

(defun init-read-buffer-stack ()
  (setq *read-buffer-stack*
	(make-read-buffer-stack +read-buffer-initial-size+ +read-buffer-pool-size+)))

(defun allocate-read-buffer ()
  (cond ((zerop (fill-pointer *read-buffer-stack*))
	 (make-string 32))
	(t (vector-pop  *read-buffer-stack*))))

(defun free-read-buffer (buffer)
  (vector-push buffer *read-buffer-stack*))

;;; Recursive reader functions use with-read-buffer to allocate a
;;; fresh buffer.  We currently allocate a fresh buffer only for the
;;; exported functions READ, READ-PRESERVING-WHITESPACE,
;;; READ-FROM-STRING, and READ-DELIMITED-LIST.  Some internal
;;; functions like READ-TOKEN, INTERNAL-READ-EXTENDED-TOKEN and
;;; READ-STRING avoid the overhead for the allocation and clobber the
;;; current read-buffer.  

(defmacro with-read-buffer (() &body body)
  "Bind *read-buffer* to a fresh buffer and execute Body."
gerd's avatar
gerd committed
  `(let* ((*read-buffer* (allocate-read-buffer))
	  (*read-buffer-length* (length *read-buffer*))
	  (*ouch-ptr* 0)
	  (*inch-ptr* 0))
    (unwind-protect (progn ,@body)
      (free-read-buffer *read-buffer*))))
ram's avatar
ram committed
(defmacro reset-read-buffer ()
gerd's avatar
gerd committed
  ;;turn *read-buffer* into an empty read-buffer.
  ;;*ouch-ptr* always points to next char to write
ram's avatar
ram committed
  `(progn
gerd's avatar
gerd committed
    (setq *ouch-ptr* 0)
    ;;*inch-ptr* always points to next char to read
    (setq *inch-ptr* 0)))
ram's avatar
ram committed

(defmacro ouch-read-buffer (char)
  `(progn
gerd's avatar
gerd committed
    (if (>= (the fixnum *ouch-ptr*)
	    (the fixnum *read-buffer-length*))
ram's avatar
ram committed
	;;buffer overflow -- double the size
	(grow-read-buffer))
gerd's avatar
gerd committed
    (setf (elt (the simple-string *read-buffer*) *ouch-ptr*) ,char)
    (setq *ouch-ptr* (1+ *ouch-ptr*))))

;; macro to move *ouch-ptr* back one.
ram's avatar
ram committed
(defmacro ouch-unread-buffer ()
gerd's avatar
gerd committed
  '(if (> (the fixnum *ouch-ptr*) (the fixnum *inch-ptr*))
       (setq *ouch-ptr* (1- (the fixnum *ouch-ptr*)))))
ram's avatar
ram committed

(defun grow-read-buffer ()
gerd's avatar
gerd committed
  (let ((rbl (length (the simple-string *read-buffer*))))
ram's avatar
ram committed
    (declare (fixnum rbl))
gerd's avatar
gerd committed
    (setq *read-buffer*
ram's avatar
ram committed
	  (concatenate 'simple-string
gerd's avatar
gerd committed
		       (the simple-string *read-buffer*)
ram's avatar
ram committed
		       (the simple-string (make-string rbl))))
gerd's avatar
gerd committed
    (setq *read-buffer-length* (* 2 rbl))))
ram's avatar
ram committed

(defun inchpeek-read-buffer ()
gerd's avatar
gerd committed
  (if (>= (the fixnum *inch-ptr*) (the fixnum *ouch-ptr*))
ram's avatar
ram committed
      eof-object
gerd's avatar
gerd committed
      (elt (the simple-string *read-buffer*) *inch-ptr*)))
ram's avatar
ram committed

(defun inch-read-buffer ()
gerd's avatar
gerd committed
  (cond ((>= (the fixnum *inch-ptr*) (the fixnum *ouch-ptr*))
ram's avatar
ram committed
	 eof-object)
gerd's avatar
gerd committed
	(t (prog1 (elt (the simple-string *read-buffer*) *inch-ptr*)
		  (setq *inch-ptr* (1+ (the fixnum *inch-ptr*)))))))
ram's avatar
ram committed

(defmacro unread-buffer ()
gerd's avatar
gerd committed
  `(decf (the fixnum *inch-ptr*)))
ram's avatar
ram committed

(defun read-unwind-read-buffer ()
  ;;keep contents, but make next (inch..) return first char.
gerd's avatar
gerd committed
  (setq *inch-ptr* 0))
ram's avatar
ram committed

(defun read-buffer-to-string ()
gerd's avatar
gerd committed
  (subseq (the simple-string *read-buffer*) 0 *ouch-ptr*))
ram's avatar
ram committed



;;;; READ-PRESERVING-WHITESPACE, READ-DELIMITED-LIST, and READ.

(defvar *ignore-extra-close-parentheses* t
  "If true, only warn when there is an extra close paren, otherwise error.")
ram's avatar
ram committed

pw's avatar
pw committed
(declaim (special *standard-input*))

;;; The SHARP-EQUAL function introduces a label for an object, and the
;;; SHARP-SHARP function allows us to refer to this label.  This is slightly
;;; tricky because the object we are labelling can contain labels to itself in
;;; order to create circular structures, e.g., #1=(cons 'hello #1#).
;;;
;;; The SHARP-EQUAL function is called when we are reading a stream and
;;; encounter text of the form #<LABEL>=<OBJ>, where <LABEL> should be a number
;;; and <OBJ> ought to be readable as an object.  Our first step is to use
;;; gensym to create a new <TAG> for this label; we temporarily bind <LABEL> to
;;; <TAG> and then read in <OBJ> using our <TAG> as a temporary binding for
;;; <LABEL>.  Finally, we fix the <OBJ> by replacing any occurrences of <TAG>
;;; with a pointer to <OBJ> itself, creating the circular structures.
;;;
;;; We now do this with a couple of data structures.
;;;
;;; 1.  *SHARP-EQUAL-FINAL-TABLE* is a hash table where "finished" associations
;;;     are stored.  That is, it is a hash table from labels to objects, where
;;;     the objects have already been patched and are tag-free.
;;;
;;; 2.  *SHARP-EQUAL-TEMP-TABLE* is a hash table where "unfinished"
;;;     associations are stored.  That is, it is a hash table from labels to
;;;     tags.
;;;
;;; 3.  *SHARP-EQUAL-REPL-TABLE* is a hash table that associates tags with
;;;     their corrective pointers.  That is, this is the table we use to
;;;     "patch" the objects.

(defvar *sharp-equal-final-table*)
(defvar *sharp-equal-temp-table*)
(defvar *sharp-equal-repl-table*)


ram's avatar
ram committed
 
;;; READ-PRESERVING-WHITESPACE behaves just like read only it makes sure
;;; to leave terminating whitespace in the stream.
;;;
(defun read-preserving-whitespace (&optional (stream *standard-input*)
gerd's avatar
gerd committed
				   (eof-errorp t) (eof-value nil)
				   (recursivep nil))
  "Reads from stream and returns the object read, preserving the whitespace
gerd's avatar
gerd committed
  (with-read-buffer ()
    (read-preserving-whitespace-internal stream eof-errorp eof-value recursivep)))

(defun read-preserving-whitespace-internal (&optional (stream *standard-input*)
					    (eof-errorp t) (eof-value nil)
					    (recursivep nil))
  
gerd's avatar
gerd committed
    (recursivep
     ;; Loop for repeating when a macro returns nothing.
     (loop
      (let ((char (read-char stream eof-errorp eof-object)))
	(cond ((eofp char) (return eof-value))
	      ((whitespacep char))
	      (t
	       (let* ((macrofun (get-cmt-entry char *readtable*))
		      (result (multiple-value-list
			       (funcall macrofun stream char))))
		 ;; Repeat if macro returned nothing.
		 (if result (return (if *read-suppress*
					nil
					(car result))))))))))
gerd's avatar
gerd committed
    (t
     (let ((*sharp-equal-final-table* nil)
	   (*sharp-equal-temp-table* nil)
	   (*sharp-equal-repl-table* nil))
gerd's avatar
gerd committed
       (read-preserving-whitespace-internal stream eof-errorp eof-value t)))))
ram's avatar
ram committed

(defun read-maybe-nothing (stream char)
  ;;returns nil or a list with one thing, depending.
  ;;for functions that want comments to return so they can look
  ;;past them.  Assumes char is not whitespace.
  (let ((retval (multiple-value-list
		 (funcall (get-cmt-entry char *readtable*) stream char))))
    (if retval (rplacd retval nil))))

(defun read (&optional (stream *standard-input*) (eof-errorp t)
		       (eof-value ()) (recursivep ()))
  "Reads in the next object in the stream, which defaults to
   *standard-input*. For details see the I/O chapter of
   the manual."
gerd's avatar
gerd committed
  (with-read-buffer ()
    (read-internal stream eof-errorp eof-value recursivep)))

(defun read-internal (&optional (stream *standard-input*) (eof-errorp t)
		      (eof-value ()) (recursivep ()))
gerd's avatar
gerd committed
      (read-preserving-whitespace-internal stream eof-errorp eof-value recursivep)
ram's avatar
ram committed
    (let ((whitechar (read-char stream nil eof-object)))
      (if (and (not (eofp whitechar))
	       (or (not (whitespacep whitechar))
		   recursivep))
	  (unread-char whitechar stream)))))

(defun read-delimited-list (endchar &optional
				    (input-stream *standard-input*)
				    recursive-p)
  "Reads objects from input-stream until the next character after an
ram's avatar
ram committed
   object's representation is endchar.  A list of those objects read
   is returned."
  (declare (ignore recursive-p))
gerd's avatar
gerd committed
  (with-read-buffer ()
    (do ((char (flush-whitespace input-stream)
	       (flush-whitespace input-stream))
	 (retlist ()))
	((char= char endchar) (nreverse retlist))
      (setq retlist (nconc (read-maybe-nothing input-stream char) retlist)))))
ram's avatar
ram committed


;;;; Standard ReadMacro definitions to implement the reader.

(defun read-quote (stream ignore)
  (declare (ignore ignore))
gerd's avatar
gerd committed
  (list 'quote (read-internal stream t nil t)))
ram's avatar
ram committed

(defun read-comment (stream ignore)
  (declare (ignore ignore))
  (let ((stream (in-synonym-of stream)))
toy's avatar
toy committed
    (stream-dispatch stream
      ;; simple-stream
      (do ((char (stream::%read-char stream nil nil t t)
		 (stream::%read-char stream nil nil t t)))
	  ((or (not char) (char= char #\newline))))
      ;; lisp-stream
      (prepare-for-fast-read-char stream
        (do ((char (fast-read-char nil nil)
		   (fast-read-char nil nil)))
	    ((or (not char) (char= char #\newline))
	     (done-with-fast-read-char))))
      ;; fundamental-stream
      (do ((char (stream-read-char stream) (stream-read-char stream)))
	  ((or (eq char :eof) (char= char #\newline))))))
  ;; don't return anything
ram's avatar
ram committed
  (values))

(defun read-list (stream ignore)
  (declare (ignore ignore))
  (let* ((thelist (list nil))
	 (listtail thelist))
    (do ((firstchar (flush-whitespace stream) (flush-whitespace stream)))
	((char= firstchar #\) ) (cdr thelist))
      (when (char= firstchar #\.)
	    (let ((nextchar (read-char stream t)))
	      (cond ((token-delimiterp nextchar)
		     (cond ((eq listtail thelist)
			    (if *read-suppress*
				(return-from read-list nil)
				(%reader-error stream _"Nothing appears before . in list.")))
ram's avatar
ram committed
			   ((whitespacep nextchar)
			    (setq nextchar (flush-whitespace stream))))
		     (rplacd listtail
			     ;;return list containing last thing.
			     (car (read-after-dot stream nextchar)))
		     (return (cdr thelist)))
		    ;;put back nextchar so we can read it normally.
		    (t (unread-char nextchar stream)))))
      ;;next thing is not an isolated dot.
      (let ((listobj (read-maybe-nothing stream firstchar)))
	;;allows the possibility that a comment was read.
	(when listobj
	      (rplacd listtail listobj)
	      (setq listtail listobj))))))

(defun read-after-dot (stream firstchar)
  ;;firstchar is non-whitespace!
  (let ((lastobj ()))
    (do ((char firstchar (flush-whitespace stream)))
	((char= char #\) )
	 (%reader-error stream _"Nothing appears after . in list."))
ram's avatar
ram committed
      ;;see if there's something there.
      (setq lastobj (read-maybe-nothing stream char))
      (when lastobj (return t)))
    ;;at least one thing appears after the dot.
    ;;check for more than one thing following dot.
    (do ((lastchar (flush-whitespace stream)
		   (flush-whitespace stream)))
	((char= lastchar #\) ) lastobj)	;success!
      ;;try reading virtual whitespace
      (if (read-maybe-nothing stream lastchar)
	  (%reader-error stream _"More than one object follows . in list.")))))
ram's avatar
ram committed

(defun read-string (stream closech)
  ;;this accumulates chars until it sees same char that invoked it.
  ;;for a very long string, this could end up bloating the read buffer.
  (reset-read-buffer)
  (let ((stream (in-synonym-of stream)))
toy's avatar
toy committed
    (stream-dispatch stream
      ;; simple-stream
      (do ((char (stream::%read-char stream t nil t t)
		 (stream::%read-char stream t nil t t)))
	  ((char= char closech))
	(if (escapep char) (setq char (stream::%read-char stream t nil t t)))
	(ouch-read-buffer char))
      ;; lisp-stream
      (prepare-for-fast-read-char stream
        (do ((char (fast-read-char t) (fast-read-char t)))
	    ((char= char closech)
	     (done-with-fast-read-char))
	  (if (escapep char) (setq char (fast-read-char t)))
	  (ouch-read-buffer char)))
      ;; fundamental-stream
      (do ((char (stream-read-char stream) (stream-read-char stream)))
	  ((or (eq char :eof) (char= char closech))
	   (if (eq char :eof)
	       (error 'end-of-file :stream stream)))
	(when (escapep char)
	  (setq char (stream-read-char stream))
	  (if (eq char :eof)
	      (error 'end-of-file :stream stream)))
	(ouch-read-buffer char))))
ram's avatar
ram committed
  (read-buffer-to-string))

(defun read-right-paren (stream ignore)
  (declare (ignore ignore))
    (cond (*ignore-extra-close-parentheses*
	   (warn _"Ignoring unmatched close parenthesis~
		  ~@[ at file position ~D~]."
		 (file-position stream))
	   (values))
	  (t
	   (%reader-error stream _"Unmatched close parenthesis."))))