/
/bzip2.lisp
  1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
  2 ;;; bzip2.lisp
  3 
  4 ;;; Copyright (c) 2008, Chaitanya Gupta.
  5 ;;; All rights reserved.
  6 ;;; 
  7 ;;; Redistribution and use in source and binary forms, with or without
  8 ;;; modification, are permitted provided that the following conditions
  9 ;;; are met:
 10 ;;; 1. Redistributions of source code must retain the above copyright
 11 ;;;    notice, this list of conditions and the following disclaimer.
 12 ;;; 2. Redistributions in binary form must reproduce the above copyright
 13 ;;;    notice, this list of conditions and the following disclaimer in the
 14 ;;;    documentation and/or other materials provided with the distribution.
 15 ;;; 3. The name of the author may not be used to endorse or promote products
 16 ;;;    derived from this software without specific prior written permission.
 17 ;;; 
 18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 19 ;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 20 ;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 21 ;;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 22 ;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 23 ;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24 ;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25 ;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 ;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 27 ;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 
 29 (defpackage #:cl-bzip2
 30   (:use #:cl #:cffi)
 31   (:nicknames #:bzip2)
 32   (:export "COMPRESS"
 33            "DECOMPRESS"
 34            "BZ-ERROR"))
 35 
 36 (in-package #:cl-bzip2)
 37 
 38 ;;; Define and load libbzip2
 39 
 40 (define-foreign-library libbz2
 41   (:darwin "libbz2.dylib")
 42   (:unix "libbz2.so")
 43   (t (:default "libbz2")))
 44 
 45 (with-simple-restart (skip "Skip loading foreign library LIBBZIP2.")
 46   (use-foreign-library libbz2))
 47 
 48 ;;; bz return codes
 49 
 50 (defvar *bz-return-codes* (make-hash-table))
 51 
 52 (defmacro define-bz-return-code (name code)
 53   (let ((var-name (format nil "+~A+" (substitute #\- #\_ (string name)))))
 54     `(progn
 55        (setf (gethash ,code *bz-return-codes*) ',name)
 56        (defconstant ,(intern var-name :bzip2) ,code))))
 57 
 58 (define-bz-return-code BZ_OK 0)
 59 (define-bz-return-code BZ_RUN_OK 1)
 60 (define-bz-return-code BZ_FLUSH_OK 2)
 61 (define-bz-return-code BZ_FINISH_OK 3)
 62 (define-bz-return-code BZ_STREAM_END 4)
 63 (define-bz-return-code BZ_SEQUENCE_ERROR -1)
 64 (define-bz-return-code BZ_PARAM_ERROR -2)
 65 (define-bz-return-code BZ_MEM_ERROR -3)
 66 (define-bz-return-code BZ_DATA_ERROR -4)
 67 (define-bz-return-code BZ_DATA_ERROR_MAGIC -5)
 68 (define-bz-return-code BZ_IO_ERROR -6)
 69 (define-bz-return-code BZ_UNEXPECTED_EOF -7)
 70 (define-bz-return-code BZ_OUTBUFF_FULL -8)
 71 (define-bz-return-code BZ_CONFIG_ERROR -9)
 72 
 73 ;;; bz_stream
 74 
 75 (defcstruct bz-stream
 76   (next-in :pointer)
 77   (avail-in :uint)
 78   (total-in-lo32 :uint)
 79   (total-in-hi32 :uint)
 80   
 81   (next-out :pointer)
 82   (avail-out :uint)
 83   (total-out-lo32 :uint)
 84   (total-out-hi32 :uint)
 85   
 86   (state :pointer)
 87   
 88   (bzalloc :pointer)
 89   (bzfree :pointer)
 90   (opaque :pointer))
 91 
 92 (defun bz-stream-slot-value (object slot-name)
 93   (foreign-slot-value object 'bz-stream slot-name))
 94 
 95 (defun (setf bz-stream-slot-value) (value object slot-name)
 96   (setf (foreign-slot-value object 'bz-stream slot-name) value))
 97 
 98 (defun make-bz-stream ()
 99   (let ((bz-stream (foreign-alloc 'bz-stream)))
100     (setf (bz-stream-slot-value bz-stream 'bzalloc) (null-pointer)
101           (bz-stream-slot-value bz-stream 'bzfree) (null-pointer)
102           (bz-stream-slot-value bz-stream 'opaque) (null-pointer))
103     bz-stream))
104 
105 (defun free-bz-stream (bz-stream)
106   (foreign-free bz-stream))
107 
108 (defmacro with-bz-stream ((var) &body body)
109   `(let ((,var (make-bz-stream)))
110      (unwind-protect (progn ,@body)
111        (free-bz-stream ,var))))
112 
113 ;;; Error handling
114 
115 (define-condition bz-error (error)
116   ((code :initarg :code
117          :reader bz-error-code
118          :initform (error "CODE is required."))
119    (cfun :initarg :cfun
120          :reader bz-error-cfun
121          :initform (error "CFUN is required.")))
122   (:report (lambda (c stream)
123              (format stream "Error in function ~A: ~A (~A)"
124                      (bz-error-cfun c)
125                      (gethash (bz-error-code c) *bz-return-codes*)
126                      (bz-error-code c))))
127   (:documentation "The default condition type for any BZIP2
128   compression/decompression related error."))
129 
130 (defun bz-error (code cfun)
131   (error 'bz-error
132          :code code
133          :cfun cfun))
134 
135 (defun ensure-list (item)
136   (etypecase item
137     (atom (list item))
138     (list item)))
139 
140 (defmacro with-bz-error-handling ((cfun &key accept deny) &body body)
141   (assert (not (and accept deny))
142           ()
143           "ACCEPT and DENY cannot both be non-NIL.")
144   (let ((return-code (gensym "RET-CODE")))
145     `(let ((,return-code (progn
146                            ,@body)))
147        ,(cond
148          (accept `(unless (member ,return-code (ensure-list ,accept))
149                     (bz-error ,return-code ,cfun)))
150          (deny `(when (member ,return-code (ensure-list ,deny))
151                   (bz-error ,return-code ,cfun))))
152        ,return-code)))
153 
154 ;;; Wrappers over low-level compress API
155 
156 (defcfun ("BZ2_bzCompressInit" %bz-compress-init) :int
157   (bz-stream :pointer)
158   (block-size-100k :int)
159   (verbosity :int)
160   (work-factor :int))
161 
162 (defun bz-compress-init (bz-stream block-size-100k verbosity work-factor)
163   (with-bz-error-handling ("BZ2_bzCompressInit" :accept +bz-ok+)
164     (%bz-compress-init bz-stream block-size-100k verbosity work-factor)))
165 
166 ;;;; Actions for BZ2_bzCompress
167 
168 (defconstant +bz-run+ 0)
169 (defconstant +bz-flush+ 1)
170 (defconstant +bz-finish+ 2)
171 
172 (defcfun ("BZ2_bzCompress" %bz-compress) :int
173   (bz-stream :pointer)
174   (action :int))
175 
176 (defun bz-compress (bz-stream action)
177   (flet ((!accept (code-designator)
178            (with-bz-error-handling ("BZ2_bzCompress" :accept code-designator)
179              (%bz-compress bz-stream action))))
180     (cond 
181       ((= action +bz-run+)
182        (!accept +bz-run-ok+))
183       ((= action +bz-flush+)
184        (!accept +bz-flush-ok+))
185       ((= action +bz-finish+)
186        (!accept (list +bz-finish-ok+ +bz-stream-end+))))))
187 
188 (defcfun ("BZ2_bzCompressEnd" %bz-compress-end) :int
189   (bz-stream :pointer))
190 
191 (defun bz-compress-end (bz-stream)
192   (with-bz-error-handling ("BZ2_bzCompressEnd" :accept +bz-ok+)
193     (%bz-compress-end bz-stream)))
194 
195 ;;; Wrappers over low-level decompress API
196 
197 (defcfun ("BZ2_bzDecompressInit" %bz-decompress-init) :int
198   (bz-stream :pointer)
199   (verbosity :int)
200   (small :int))
201 
202 (defun bz-decompress-init (bz-stream verbosity smallp)
203   (with-bz-error-handling ("BZ2_bzDecompressInit" :accept +bz-ok+)
204     (%bz-decompress-init bz-stream
205                          verbosity
206                          (if smallp 1 0))))
207 
208 (defcfun ("BZ2_bzDecompress" %bz-decompress) :int
209   (bz-stream :pointer))
210 
211 (defun bz-decompress (bz-stream)
212   (with-bz-error-handling ("BZ2_bzDecompress" :accept (list +bz-ok+ +bz-stream-end+))
213     (%bz-decompress bz-stream)))
214 
215 (defcfun ("BZ2_bzDecompressEnd" %bz-decompress-end) :int
216   (bz-strem :pointer))
217 
218 (defun bz-decompress-end (bz-stream)
219   (with-bz-error-handling ("BZ2_bzDecompressEnd" :accept +bz-ok+)
220     (%bz-decompress-end bz-stream)))
221 
222 ;;;; Easy types
223 
224 (deftype octet ()
225   '(unsigned-byte 8))
226 
227 (deftype octet-vector ()
228   '(vector (unsigned-byte 8)))
229 
230 ;;; Block read/write from C memory
231 
232 (defun mem-read-stream (stream ptr length)
233   (loop
234      for i from 0 to (1- length)
235      do (write-byte (mem-aref ptr :uchar i) stream)))
236 
237 (defun mem-write-vector (vector ptr &optional (count (length vector)))
238   (loop
239      for i below count
240      do (setf (mem-aref ptr :uchar i) (aref vector i))))
241 
242 ;;; High level stuff
243 
244 (defparameter *input-chunk-size* 4096)
245 (defparameter *output-chunk-size* 4096)
246 
247 (defstruct (array-container
248              (:constructor make-array-container-aux)
249              (:conc-name nil))
250   in-vec
251   in-cvec
252   out-cvec
253   input-chunk-size
254   output-chunk-size)
255 
256 (defun make-array-container (input-chunk-size output-chunk-size)
257   (make-array-container-aux
258    :in-vec (make-sequence 'octet-vector input-chunk-size)
259    :in-cvec (foreign-alloc :uchar :count input-chunk-size)
260    :out-cvec (foreign-alloc :uchar :count output-chunk-size)
261    :input-chunk-size input-chunk-size
262    :output-chunk-size output-chunk-size))
263 
264 (defun free-array-container (container)
265   (foreign-free (in-cvec container))
266   (foreign-free (out-cvec container)))
267 
268 (defmacro with-array-container ((var &key
269                                      (input-chunk-size '*input-chunk-size*)
270                                      (output-chunk-size '*output-chunk-size*))
271                                 &body body)
272   `(let ((,var (make-array-container ,input-chunk-size ,output-chunk-size)))
273      (unwind-protect (progn ,@body)
274        (free-array-container ,var))))
275 
276 ;;;; High level compression calls
277 
278 (defun compress-stream-aux (bz-stream in-stream out-stream ac)
279   (let* ((input-chunk-size (input-chunk-size ac))
280          (output-chunk-size (output-chunk-size ac))
281          (in-vec (in-vec ac))
282          (in-size (read-sequence in-vec in-stream :start 0 :end input-chunk-size))
283          (in-cvec (in-cvec ac))
284          (out-cvec (out-cvec ac))
285          (input-left-p (= in-size input-chunk-size)))
286     (mem-write-vector in-vec in-cvec in-size)
287     (with-foreign-slots ((next-in avail-in next-out avail-out) bz-stream bz-stream)
288       (setf next-in in-cvec
289             avail-in in-size)
290       ;; The following code is a translation from the C source of
291       ;; BZ2_bzWrite and BZ2_bzWriteClose64 in bzlib.c.
292       ;; Using action BZ_RUN
293       (block run
294         (loop
295            (setf next-out out-cvec
296                  avail-out output-chunk-size)
297            ;; bzCompress/BZ_RUN
298            (bz-compress bz-stream +bz-run+)
299            (when (< avail-out output-chunk-size)
300              ;; Writing to output stream
301              (mem-read-stream out-stream out-cvec (- output-chunk-size avail-out)))
302            (when (= avail-in 0)
303              (return-from run nil))))
304       ;; Using action BZ_FINISH, only if no more input is left
305       (unless input-left-p
306         (block finish
307           (loop
308              (setf next-out out-cvec
309                    avail-out output-chunk-size)
310              (let ((return-code
311                     ;; bzCompress/BZ_FINISH
312                     (bz-compress bz-stream +bz-finish+)))
313                (when (< avail-out output-chunk-size)
314                  ;; Writing to output stream
315                  (mem-read-stream out-stream out-cvec (- output-chunk-size avail-out)))
316                (when (= return-code +bz-stream-end+)
317                  (return-from finish nil)))))))
318     (force-output out-stream)
319     ;; More input available?
320     input-left-p))
321 
322 (defparameter *block-size-100k* 9)
323 (defparameter *verbosity* 0)
324 (defparameter *work-factor* 30)
325 
326 (defun compress-stream (in-stream out-stream &key
327                         (block-size-100k *block-size-100k*)
328                         (verbosity *verbosity*)
329                         (work-factor *work-factor*))
330   (with-bz-stream (bz-stream)
331     (bz-compress-init bz-stream block-size-100k verbosity work-factor)
332     (unwind-protect
333          (with-array-container (ac)
334            (loop
335               while (compress-stream-aux bz-stream in-stream out-stream ac)))
336       (bz-compress-end bz-stream)))
337   (values))
338 
339 (defun compress (in out &key
340                  (block-size-100k *block-size-100k*)
341                  (verbosity *verbosity*)
342                  (work-factor *work-factor*))
343   "Compresses data from IN to OUT. IN or OUT can either be a
344 binary stream or a pathname. This function doesn't return any
345 value.
346 
347 BLOCK-SIZE-100K (default 9), VERBOSITY (default 0) and
348 WORK-FACTOR (default 30) correspond to the parameters
349 `blockSize100k', `verbosity' and `workFactor', respectively, for
350 the libbzip2 function `BZ2_bzCompressInit'.
351 
352 From the bzip2 manual:
353 
354 Parameter `blockSize100k' specifies the block size to be used for
355 compression. It should be a value between 1 and 9 inclusive, and
356 the actual block size used is 100000 x this figure. 9 gives the
357 best compression but takes most memory.
358 
359 Parameter `verbosity' should be set to a number between 0 and 4
360 inclusive. 0 is silent, and greater numbers give increasingly
361 verbose monitoring/debugging output.
362 
363 Parameter `workFactor' controls how the compression phase behaves
364 when presented with worst case, highly repetitive, input data. If
365 compression runs into difficulties caused by repetitive data, the
366 library switches from the standard sorting algorithm to a
367 fallback algorithm. The fallback is slower than the standard
368 algorithm by perhaps a factor of three, but always behaves
369 reasonably, no matter how bad the input."
370   (let ((*block-size-100k* block-size-100k)
371         (*verbosity* verbosity)
372         (*work-factor* work-factor))
373     (compress-aux in out)))
374 
375 (defgeneric compress-aux (in out)
376   (:documentation "Specialize behaviour for COMPRESS depending on IN and OUT."))
377 
378 (defmethod compress-aux ((in stream) (out stream))
379   (compress-stream in out))
380 
381 (defmethod compress-aux ((in pathname) out)
382   (with-open-file (in-stream in :direction :input :element-type 'octet)
383     (compress-aux in-stream out)))
384 
385 (defmethod compress-aux (in (out pathname))
386   (with-open-file (out-stream out :direction :output :element-type 'octet :if-exists :supersede)
387     (compress-aux in out-stream)))
388 
389 ;;;; High level decompression calls
390 
391 (defun decompress-stream-aux (bz-stream in-stream out-stream ac)
392   (let* ((input-chunk-size (input-chunk-size ac))
393          (output-chunk-size (output-chunk-size ac))
394          (in-vec (in-vec ac))
395          (in-size (read-sequence in-vec in-stream :start 0 :end input-chunk-size))
396          (in-cvec (in-cvec ac))
397          (out-cvec (out-cvec ac))
398          (input-left-p (= in-size input-chunk-size)))
399     (mem-write-vector in-vec in-cvec in-size)
400     (with-foreign-slots ((next-in avail-in next-out avail-out) bz-stream bz-stream)
401       (setf next-in in-cvec
402             avail-in in-size)
403       ;; bz_decompress
404       (block decompress
405         (loop
406            (setf next-out out-cvec
407                  avail-out output-chunk-size)
408            (let ((return-code (bz-decompress bz-stream)))
409              (when (< avail-out output-chunk-size)
410                (mem-read-stream out-stream out-cvec (- output-chunk-size avail-out)))
411              (when (or (= avail-in 0)
412                        (= return-code +bz-stream-end+))
413                (return-from decompress nil)))))
414       ;; More input available?
415       input-left-p)))
416 
417 (defparameter *smallp* nil)
418 
419 (defun decompress-stream (in-stream out-stream &key
420                           (verbosity *verbosity*)
421                           (smallp *smallp*))
422   (with-bz-stream (bz-stream)
423     (bz-decompress-init bz-stream verbosity smallp)
424     (unwind-protect
425          (with-array-container (ac)
426            (loop
427               while (decompress-stream-aux bz-stream in-stream out-stream ac)))
428       (bz-decompress-end bz-stream)))
429   (values))
430 
431 (defun decompress (in out &key
432                    (verbosity *verbosity*)
433                    (smallp *smallp*))
434   "Decompresses data from IN to OUT. IN or OUT can either be a
435 binary stream or a pathname. This function doesn't return any
436 value.
437 
438 VERBOSITY and SMALLP (default NIL) correspond to the parameters
439 `verbosity' and `small', respectively, for the libbzip2 function
440 `BZ2_bzDecompressInit'.
441 
442 For the meaning of VERBOSITY, see the documentation for
443 COMPRESS. A non-NIL value for SMALLP corresponds to a non-zero
444 value for the parameter `small'. Here's what the bzip2 manual
445 says about `small':
446 
447 If `small' is nonzero, the library will use an alternative
448 decompression algorithm which uses less memory but at the cost of
449 decompressing more slowly (roughly speaking, half the speed, but
450 the maximum memory requirement drops to around 2300k)."
451   (let ((*verbosity* verbosity)
452         (*smallp* smallp))
453     (decompress-aux in out)))
454 
455 (defgeneric decompress-aux (in out)
456   (:documentation "Specialize behaviour for DECOMPRESS depending on IN and OUT."))
457 
458 (defmethod decompress-aux ((in stream) (out stream))
459   (decompress-stream in out))
460 
461 (defmethod decompress-aux ((in pathname) out)
462   (with-open-file (in-stream in :direction :input :element-type 'octet)
463     (decompress-aux in-stream out)))
464 
465 (defmethod decompress-aux (in (out pathname))
466   (with-open-file (out-stream out :direction :output :element-type 'octet :if-exists :supersede)
467     (decompress-aux in out-stream)))