--- /dev/null
+;;; -*- Mode: Lisp ; Base: 10 ; Syntax: ANSI-Common-Lisp -*-
+;;; csv: reading files in Comma-Separated Values format.
+
+#| "
+HOME PAGE:
+ http://www.cliki.net/fare-csv
+
+LICENSE:
+ http://www.geocities.com/SoHo/Cafe/5947/bugroff.html
+ Also under no-restriction BSD license for those who insist.
+
+DEPENDENCIES:
+ apt-get install cl-asdf
+
+USAGE:
+ (asdf:load-system :fare-csv) ;; or (asdf:oos 'asdf:load-op :fare-csv) if using an old asdf
+ (read-csv-line)
+ (read-csv-stream s)
+ (read-csv-file "foo.csv")
+
+EXAMPLE USE:
+ ...
+
+BUGS:
+ I implemented just enough of CSV to import a specific file
+ from a PC application that will remain unnamed.
+ If you need more, you can cont(r)act me, and/or hack it yourself.
+
+ CSV is intrinsically an underspecified lossy format,
+ and the particular PC application I'm using loses heavily
+ (i.e. no quoting convention at all, not even a pascal-like one)
+ when text fields contain the quote character. Ouch.
+
+SEE ALSO:
+ This spec seems to explain popular usage, is refered by docs below.
+ http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
+
+ This one says about the same:
+ http://edoceo.com/utilitas/csv-file-format
+
+ There's now an RFC that tries to standardize CSV:
+ http://www.rfc-editor.org/rfc/rfc4180.txt
+
+ Here's what Perl hackers think CSV is:
+ http://search.cpan.org/~hmbrand/Text-CSV_XS-0.59/CSV_XS.pm
+
+
+Share and enjoy!
+" |#
+
+; -----------------------------------------------------------------------------
+;;; Packaging stuff
+
+(in-package :fare-csv)
+
+; -----------------------------------------------------------------------------
+;;; Optimization
+(eval-when (:compile-toplevel)
+ (declaim (optimize (speed 3) (safety 1) (debug 3))))
+
+; -----------------------------------------------------------------------------
+;;; Thin compatibility layer
+#| ;;; Not needed anymore
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ (unless (fboundp 'parse-number)
+ (defun parse-number (s)
+ (with-standard-io-syntax ()
+ (let* ((*read-eval* nil)
+ (*read-default-float-format* 'double-float)
+ (n (read-from-string s)))
+ (if (numberp n) n)))))) |#
+
+; -----------------------------------------------------------------------------
+;;; Parameters
+
+(eval-when (:compile-toplevel :load-toplevel)
+ (defparameter +cr+ #.(format nil "~A" #\Return))
+ (defparameter +lf+ #.(format nil "~A" #\Linefeed))
+ (defparameter +crlf+ #.(format nil "~A~A" #\Return #\Linefeed))
+ (defparameter *csv-variables* '())
+ (defparameter *csv-rfc4180-values* '())
+ (defparameter *csv-creativyst-values* '())
+ (macrolet
+ ((def (var rfc4180 creativyst doc)
+ (push var *csv-variables*)
+ (push creativyst *csv-creativyst-values*)
+ (push rfc4180 *csv-rfc4180-values*)
+ `(defparameter ,var ,creativyst ,doc)))
+ (def *separator*
+ #\, #\,
+ "Separator between CSV fields")
+ (def *quote*
+ #\" #\"
+ "delimiter of string data; pascal-like quoted as double itself in a string.")
+ (def *unquoted-quotequote*
+ nil nil
+ "does a pair of quotes represent a quote outside of quotes?
+M$, RFC says NIL, csv.3tcl says T")
+ (def *loose-quote*
+ nil nil
+ "can quotes appear anywhere in a field?")
+ (def *allow-binary*
+ t t
+ "do we accept non-ascii data?")
+ (def *keep-meta-info*
+ nil nil
+ "when parsing, include meta information?")
+ (def *eol*
+ +lf+ +crlf+
+ "line ending when exporting CSV")
+ (def *line-endings*
+ (list +crlf+ +lf+) (list +cr+ +lf+ +crlf+)
+ "acceptable line endings when importing CSV")
+ (def *skip-whitespace*
+ nil t
+ "shall we skip unquoted whitespace around separators?")))
+
+(defun char-ascii-text-p (c)
+ (<= #x20 (char-code c) #x7E))
+
+(defmacro with-creativyst-csv-syntax (() &body body)
+ `(call-with-creativyst-csv-syntax (lambda () ,@body)))
+(defun call-with-creativyst-csv-syntax (thunk)
+ (progv *csv-variables* *csv-creativyst-values*
+ (funcall thunk)))
+
+(defmacro with-rfc4180-csv-syntax (() &body body)
+ `(call-with-rfc4180-csv-syntax (lambda () ,@body)))
+(defun call-with-rfc4180-csv-syntax (thunk)
+ (progv *csv-variables* *csv-rfc4180-values*
+ (funcall thunk)))
+
+(defmacro with-strict-rfc4180-csv-syntax (() &body body)
+ `(call-with-strict-rfc4180-csv-syntax (lambda () ,@body)))
+(defun call-with-strict-rfc4180-csv-syntax (thunk)
+ (progv *csv-variables* *csv-rfc4180-values*
+ (setf *line-endings* (list +crlf+)
+ *allow-binary* nil)
+ (funcall thunk)))
+
+(defun valid-eol-p (x)
+ (member x (list +cr+ +lf+ +crlf+) :test #'equal))
+
+(defun validate-csv-parameters ()
+ (assert (typep *separator* 'character) ())
+ (assert (typep *quote* 'character) ())
+ (assert (not (eql *separator* *quote*)) ())
+ (assert (typep *unquoted-quotequote* 'boolean) ())
+ (assert (typep *loose-quote* 'boolean) ())
+ (assert (typep *keep-meta-info* 'boolean) ())
+ (assert (valid-eol-p *eol*) ())
+ (assert (not (member (aref *eol* 0) (list *separator* *quote*))) ())
+ (assert (and *line-endings* (every #'valid-eol-p *line-endings*)) ())
+ (assert (typep *skip-whitespace* 'boolean) ()))
+
+
+;; For internal use only
+(defvar *accept-cr* t "internal: do we accept cr?")
+(defvar *accept-lf* t "internal: do we accept lf?")
+(defvar *accept-crlf* t "internal: do we accept crlf?")
+
+; -----------------------------------------------------------------------------
+;;; The parser
+
+(defmacro defsubst (name arglist &body body)
+ "Declare an inline defun."
+ `(progn (declaim (inline ,name))
+ (defun ,name ,arglist ,@body)))
+
+(defsubst char-space-p (c)
+ "Is character C some kind of white space?
+BUG: this only handles a tiny subset of character sets,
+even if restricted to ASCII. However, it's rather portable."
+ (declare (type (or null character) c))
+ (and c (member c '(#\Space #\Tab))))
+
+;;#+DEBUG (defparameter *max* 2000)
+;;#+DEBUG (defun maxbreak () (when (<= *max* 0) (setf *max* 2000) (break)) (decf *max*))
+
+(defsubst accept-p (x s)
+ (let ((c (peek-char nil s nil nil)))
+ ;;#+DEBUG (format t "~&Current char: ~S~%" c)
+ ;;#+DEBUG (maxbreak)
+ (etypecase x
+ (character (eql x c))
+ ((or function symbol) (funcall x c))
+ (integer (eql x (char-code c))))))
+
+(defsubst accept (x s)
+ (and (accept-p x s)
+ (read-char s)))
+
+(defsubst accept-eof (s)
+ (not (peek-char nil s nil nil)))
+
+(defsubst accept-eol (s)
+ (block nil
+ (when (and *accept-lf* (accept #\Linefeed s)) (return t))
+ (when (or *accept-crlf* *accept-cr*)
+ (when (accept #\Return s)
+ (when *accept-crlf*
+ (if (accept #\Linefeed s)
+ (return t)
+ (unless *accept-cr*
+ (error "Carriage-return without Linefeed!"))))
+ (return t)))
+ nil))
+
+(defsubst accept-space (s)
+ (accept #'char-space-p s))
+
+(defsubst accept-spaces (s)
+ (loop for x = (accept-space s)
+ while x
+ collect x))
+
+(defsubst accept-quote (s)
+ (accept *quote* s))
+
+(defsubst accept-separator (s)
+ (accept *separator* s))
+
+(defun read-csv-line (s)
+ (validate-csv-parameters)
+ (let ((ss (make-string-output-stream))
+ (fields '())
+ (had-quotes nil)
+ ;;(had-spaces nil)
+ ;;(had-binary nil)
+ (*accept-cr* (member +cr+ *line-endings* :test #'equal))
+ (*accept-lf* (member +lf+ *line-endings* :test #'equal))
+ (*accept-crlf* (member +crlf+ *line-endings* :test #'equal)))
+ (labels
+ ((do-fields ()
+ ;;#+DEBUG (format t "~&do-field~%")
+ (setf had-quotes nil)
+ (when *skip-whitespace*
+ (accept-spaces s))
+ ;;#+DEBUG (format t "~&do-field, after spaces~%")
+ (cond
+ ((or (accept-eol s) (accept-eof s))
+ (done))
+ (t
+ (do-field-start))))
+ (do-field-start ()
+ ;;#+DEBUG (format t "~&do-field-start~%")
+ (cond
+ ((accept-separator s)
+ (add "") (do-fields))
+ ((accept-quote s)
+ (cond
+ ((and *unquoted-quotequote* (accept-quote s))
+ (add-char *quote*) (do-field-unquoted))
+ (t
+ (do-field-quoted))))
+ (t
+ (do-field-unquoted))))
+ (do-field-quoted ()
+ ;;#+DEBUG (format t "~&do-field-quoted~%")
+ (setf had-quotes t)
+ (cond
+ ((accept-eof s)
+ (error "unexpected end of stream in quotes"))
+ ((accept-quote s)
+ (cond
+ ((accept-quote s)
+ (quoted-field-char *quote*))
+ (*loose-quote*
+ (do-field-unquoted))
+ (t
+ (add (current-string))
+ (end-of-field))))
+ (t
+ (quoted-field-char (read-char s)))))
+ (quoted-field-char (c)
+ ;;#+DEBUG (format t "~"ed-field-char~%")
+ (add-char c)
+ (do-field-quoted))
+ (do-field-unquoted ()
+ ;;#+DEBUG (format t "~&do-field-unquoted~%")
+ (if *skip-whitespace*
+ (let ((spaces (accept-spaces s)))
+ (cond
+ ((accept-separator s)
+ (add (current-string))
+ (do-fields))
+ ((or (accept-eol s) (accept-eof s))
+ (add (current-string))
+ (done))
+ (t
+ (loop for x in spaces do (add-char x))
+ (do-field-unquoted-no-skip))))
+ (do-field-unquoted-no-skip)))
+ (do-field-unquoted-no-skip ()
+ ;;#+DEBUG (format t "~&do-field-unquoted-no-skip~%")
+ (cond
+ ((accept-separator s)
+ (add (current-string))
+ (do-fields))
+ ((or (accept-eol s) (accept-eof s))
+ (add (current-string))
+ (done))
+ ((accept-quote s)
+ (cond
+ ((and *unquoted-quotequote* (accept-quote s))
+ (add-char *quote*) (do-field-unquoted))
+ (*loose-quote*
+ (do-field-quoted))
+ (t
+ (error "unexpected quote in middle of field"))))
+ (t
+ (add-char (read-char s))
+ (do-field-unquoted))))
+ (end-of-field ()
+ ;;#+DEBUG (format t "~&end-of-field~%")
+ (when *skip-whitespace*
+ (accept-spaces s))
+ (cond
+ ((or (accept-eol s) (accept-eof s))
+ (done))
+ ((accept-separator s)
+ (do-fields))
+ (t
+ (error "end of field expected"))))
+ (add (x)
+ ;;#+DEBUG (format t "~&add ~S~%" x)
+ (push
+ (if *keep-meta-info*
+ (list x :quoted had-quotes)
+ x)
+ fields))
+ (add-char (c)
+ ;;#+DEBUG (format t "~&add-char ~S~%" c)
+ (write-char c ss))
+ (current-string ()
+ (get-output-stream-string ss))
+ (done ()
+ ;;#+DEBUG (format t "~&done ~S~%" fields)
+ (nreverse fields)))
+ (do-fields))))
+
+(defun read-csv-stream (s)
+ (loop until (accept-eof s)
+ collect (read-csv-line s)))
+
+(defun read-csv-file (pathname)
+ (with-open-file (s pathname :direction :input :if-does-not-exist :error)
+ (read-csv-stream s)))
+
+(defun char-needs-quoting (x)
+ (or (eql x *quote*)
+ (eql x *separator*)
+ (not (char-ascii-text-p x))))
+
+(defun string-needs-quoting (x)
+ (and (not (zerop (length x)))
+ (or (char-space-p (char x 0))
+ (char-space-p (char x (1- (length x))))
+ (some #'char-needs-quoting x))
+ t))
+
+(defun write-csv-lines (lines stream)
+ "Write many CSV line to STREAM."
+ (dolist (x lines)
+ (write-csv-line x stream)))
+
+(defun write-csv-line (fields stream)
+ "Write one CSV line to STREAM."
+ (loop for x on fields
+ while x
+ do
+ (write-csv-field (first x) stream)
+ (when (cdr x)
+ (write-char *separator* stream)))
+ (write-string *eol* stream))
+
+(defun write-csv-field (field stream)
+ (etypecase field
+ (null t)
+ (number (princ field stream))
+ (string (write-csv-string-safely field stream))
+ (symbol (write-csv-string-safely (symbol-name field) stream))))
+
+(defun write-csv-string-safely (string stream)
+ (if (string-needs-quoting string)
+ (write-quoted-string string stream)
+ (write-string string stream)))
+
+(defun write-quoted-string (string stream)
+ (write-char *quote* stream)
+ (loop for c across string do
+ (when (char= c *quote*)
+ (write-char c stream))
+ (write-char c stream))
+ (write-char *quote* stream))
+
+;(trace read-csv-line read-csv-stream)
+
+;;#+DEBUG (write (read-csv-file "test.csv"))
+;;#+DEBUG (progn (setq *separator* #\;) (write (read-csv-file "/samba/ciev.csv")))