Skip to content
util.lisp 8.18 KiB
Newer Older
Hans Huebner's avatar
Hans Huebner committed
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-PPCRE; Base: 10 -*-
Edi Weitz's avatar
Edi Weitz committed
;;; $Header: /usr/local/cvsrep/cl-ppcre/util.lisp,v 1.48 2009/10/28 07:36:15 edi Exp $
Hans Huebner's avatar
Hans Huebner committed

;;; Utility functions and constants dealing with the character sets we
;;; use to encode character classes
Hans Huebner's avatar
Hans Huebner committed

Edi Weitz's avatar
Edi Weitz committed
;;; Copyright (c) 2002-2009, Dr. Edmund Weitz. All rights reserved.
Hans Huebner's avatar
Hans Huebner committed

;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:

;;;   * Redistributions of source code must retain the above copyright
;;;     notice, this list of conditions and the following disclaimer.

;;;   * Redistributions in binary form must reproduce the above
;;;     copyright notice, this list of conditions and the following
;;;     disclaimer in the documentation and/or other materials
;;;     provided with the distribution.

;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Edi Weitz's avatar
Edi Weitz committed
(in-package :cl-ppcre)

(defmacro defconstant (name value &optional doc)
  "Make sure VALUE is evaluated only once \(to appease SBCL)."
  `(cl:defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value)
     ,@(when doc (list doc))))
Hans Huebner's avatar
Hans Huebner committed

#+:lispworks
(eval-when (:compile-toplevel :load-toplevel :execute)
  (import 'lw:with-unique-names))
Hans Huebner's avatar
Hans Huebner committed
(defmacro with-unique-names ((&rest bindings) &body body)
  "Syntax: WITH-UNIQUE-NAMES ( { var | (var x) }* ) declaration* form*

Executes a series of forms with each VAR bound to a fresh,
uninterned symbol. The uninterned symbol is as if returned by a call
to GENSYM with the string denoted by X - or, if X is not supplied, the
string denoted by VAR - as argument.

The variable bindings created are lexical unless special declarations
are specified. The scopes of the name bindings and declarations do not
include the Xs.

The forms are evaluated in order, and the values of all but the last
are discarded \(that is, the body is an implicit PROGN)."
  ;; reference implementation posted to comp.lang.lisp as
  ;; <cy3bshuf30f.fsf@ljosa.com> by Vebjorn Ljosa - see also
  ;; <http://www.cliki.net/Common%20Lisp%20Utilities>
  `(let ,(mapcar #'(lambda (binding)
                     (check-type binding (or cons symbol))
                     (if (consp binding)
                       (destructuring-bind (var x) binding
                         (check-type var symbol)
                         `(,var (gensym ,(etypecase x
                                          (symbol (symbol-name x))
                                          (character (string x))
                                          (string x)))))
                       `(,binding (gensym ,(symbol-name binding)))))
                 bindings)
         ,@body))

#+:lispworks
(eval-when (:compile-toplevel :load-toplevel :execute)
  (setf (macro-function 'with-rebinding)
          (macro-function 'lw:rebinding)))

#-:lispworks
(defmacro with-rebinding (bindings &body body)
  "WITH-REBINDING ( { var | (var prefix) }* ) form*
Hans Huebner's avatar
Hans Huebner committed

Evaluates a series of forms in the lexical environment that is
formed by adding the binding of each VAR to a fresh, uninterned
symbol, and the binding of that fresh, uninterned symbol to VAR's
original value, i.e., its value in the current lexical environment.

The uninterned symbol is created as if by a call to GENSYM with the
string denoted by PREFIX - or, if PREFIX is not supplied, the string
denoted by VAR - as argument.

The forms are evaluated in order, and the values of all but the last
are discarded \(that is, the body is an implicit PROGN)."
  ;; reference implementation posted to comp.lang.lisp as
  ;; <cy3wv0fya0p.fsf@ljosa.com> by Vebjorn Ljosa - see also
  ;; <http://www.cliki.net/Common%20Lisp%20Utilities>
  (loop for binding in bindings
        for var = (if (consp binding) (car binding) binding)
        for name = (gensym)
        collect `(,name ,var) into renames
        collect ``(,,var ,,name) into temps
        finally (return `(let ,renames
                          (with-unique-names ,bindings
                            `(let (,,@temps)
                              ,,@body))))))

Edi Weitz's avatar
Edi Weitz committed
(declaim (inline digit-char-p))  
(defun digit-char-p (chr)
  (declare #.*standard-optimize-settings*)
  "Tests whether a character is a decimal digit, i.e. the same as
Perl's [\\d].  Note that this function shadows the standard Common
Lisp function CL:DIGIT-CHAR-P."
  (char<= #\0 chr #\9))
Hans Huebner's avatar
Hans Huebner committed

Edi Weitz's avatar
Edi Weitz committed
(declaim (inline word-char-p))  
(defun word-char-p (chr)
  (declare #.*standard-optimize-settings*)
Edi Weitz's avatar
Edi Weitz committed
  "Tests whether a character is a \"word\" character.  In the ASCII
charset this is equivalent to a-z, A-Z, 0-9, or _, i.e. the same as
Perl's [\\w]."
  (or (alphanumericp chr)
      (char= chr #\_)))

(defconstant +whitespace-char-string+
  (coerce '(#\Space #\Tab #\Linefeed #\Return #\Page) 'string)
  "A string of all characters which are considered to be whitespace.
Same as Perl's [\\s].")

(defun whitespacep (chr)
  (declare #.*special-optimize-settings*)
  "Tests whether a character is whitespace, i.e. whether it would
match [\\s] in Perl."
  (find chr +whitespace-char-string+ :test #'char=))
Hans Huebner's avatar
Hans Huebner committed

(defmacro maybe-coerce-to-simple-string (string)
Edi Weitz's avatar
Edi Weitz committed
  "Coerces STRING to a simple STRING unless it already is one."
Hans Huebner's avatar
Hans Huebner committed
  (with-unique-names (=string=)
    `(let ((,=string= ,string))
Edi Weitz's avatar
Edi Weitz committed
      (cond (#+:lispworks
             (lw:simple-text-string-p ,=string=)
             #-:lispworks
             (simple-string-p ,=string=)
Hans Huebner's avatar
Hans Huebner committed
              ,=string=)
            (t
Edi Weitz's avatar
Edi Weitz committed
             (coerce ,=string=
                     #+:lispworks 'lw:simple-text-string
                     #-:lispworks 'simple-string))))))
Hans Huebner's avatar
Hans Huebner committed

(declaim (inline nsubseq))
(defun nsubseq (sequence start &optional (end (length sequence)))
Edi Weitz's avatar
Edi Weitz committed
  "Returns a subsequence by pointing to location in original sequence."
Hans Huebner's avatar
Hans Huebner committed
  (make-array (- end start)
              :element-type (array-element-type sequence)
              :displaced-to sequence
              :displaced-index-offset start))

(defun normalize-var-list (var-list)
Edi Weitz's avatar
Edi Weitz committed
  "Utility function for REGISTER-GROUPS-BIND and DO-REGISTER-GROUPS.
Creates the long form \(a list of \(FUNCTION VAR) entries) out of the
short form of VAR-LIST."
  (loop for element in var-list
        if (consp element)
          nconc (loop for var in (rest element)
                      collect (list (first element) var))
        else
          collect (list '(function identity) element)))

(defun string-list-to-simple-string (string-list)
  "Concatenates a list of strings to one simple-string."
Edi Weitz's avatar
Edi Weitz committed
  (declare #.*standard-optimize-settings*)
  ;; this function provided by JP Massar; note that we can't use APPLY
  ;; with CONCATENATE here because of CALL-ARGUMENTS-LIMIT
  (let ((total-size 0))
Edi Weitz's avatar
Edi Weitz committed
    (declare (fixnum total-size))
    (dolist (string string-list)
Edi Weitz's avatar
Edi Weitz committed
      #-:genera (declare (string string))
      (incf total-size (length string)))
Edi Weitz's avatar
Edi Weitz committed
    (let ((result-string (make-sequence #-:lispworks 'simple-string
                                        #+:lispworks 'lw:simple-text-string
                                        total-size))
          (curr-pos 0))
Edi Weitz's avatar
Edi Weitz committed
      (declare (fixnum curr-pos))
      (dolist (string string-list)
Edi Weitz's avatar
Edi Weitz committed
        #-:genera (declare (string string))
        (replace result-string string :start1 curr-pos)
        (incf curr-pos (length string)))
      result-string)))
Edi Weitz's avatar
Edi Weitz committed

(defun complement* (test-function)
  "Like COMPLEMENT but optimized for unary functions."
  (declare #.*standard-optimize-settings*)
  (typecase test-function
    (function
     (lambda (char)
       (declare (character char))
       (not (funcall (the function test-function) char))))
    (otherwise
     (lambda (char)
       (declare (character char))
       (not (funcall test-function char))))))