Clean up debug_print. Surrogate pairs are always high surrogate
[projects/cmucl/cmucl.git] / src / lisp / interr.c
CommitLineData
62957726 1/*
68ac9a3e 2 * $Header: /Volumes/share2/src/cmucl/cvs2git/cvsroot/src/lisp/interr.c,v 1.10 2009/06/11 16:04:01 rtoy Rel $
62957726 3 *
4 * Stuff to handle internal errors.
5 *
6 */
7
8#include <stdio.h>
9#include <stdarg.h>
3cefc21b 10#include <stdlib.h>
62957726 11
3861a7bf 12#include "arch.h"
62957726 13#include "signal.h"
14
15#include "lisp.h"
16#include "internals.h"
17#include "interr.h"
18#include "print.h"
19#include "lispregs.h"
62957726 20\f
9a8c1c2f 21
62957726 22/* Lossage handler. */
23
9a8c1c2f 24static void
25default_lossage_handler(void)
62957726 26{
27 exit(1);
28}
29
9a8c1c2f 30static void (*lossage_handler) (void) = default_lossage_handler;
62957726 31
9a8c1c2f 32void
33set_lossage_handler(void handler(void))
62957726 34{
35 lossage_handler = handler;
36}
37
9a8c1c2f 38void
39lose(char *fmt, ...)
62957726 40{
41 va_list ap;
42
43 if (fmt != NULL) {
44 va_start(ap, fmt);
45 vfprintf(stderr, fmt, ap);
46 fflush(stderr);
47 va_end(ap);
48 }
49 lossage_handler();
50}
62957726 51\f
9a8c1c2f 52
62957726 53/* Internal error handler for when the Lisp error system doesn't exist. */
54
55static char *errors[] = ERRORS;
56
9a8c1c2f 57void
58internal_error(os_context_t * context)
62957726 59{
60 unsigned char *ptr = arch_internal_error_arguments(context);
61 int len, scoffset, sc, offset, ch;
62
63 len = *ptr++;
64 printf("Error: %s\n", errors[*ptr++]);
65 len--;
66 while (len > 0) {
67 scoffset = *ptr++;
68 len--;
69 if (scoffset == 253) {
70 scoffset = *ptr++;
71 len--;
9a8c1c2f 72 } else if (scoffset == 254) {
73 scoffset = ptr[0] + ptr[1] * 256;
62957726 74 ptr += 2;
75 len -= 2;
9a8c1c2f 76 } else if (scoffset == 255) {
77 scoffset = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
62957726 78 ptr += 4;
79 len -= 4;
80 }
81 sc = scoffset & 0x1f;
82 offset = scoffset >> 5;
9a8c1c2f 83
62957726 84 printf(" SC: %d, Offset: %d", sc, offset);
85 switch (sc) {
86 case sc_AnyReg:
87 case sc_DescriptorReg:
9a8c1c2f 88 putchar('\t');
89 brief_print(SC_REG(context, offset));
90 break;
62957726 91
92 case sc_BaseCharReg:
9a8c1c2f 93 ch = SC_REG(context, offset);
62957726 94#ifdef i386
9a8c1c2f 95 if (offset & 1)
96 ch = ch >> 8;
97 ch = ch & 0xff;
62957726 98#endif
9a8c1c2f 99 switch (ch) {
100 case '\n':
101 printf("\t'\\n'\n");
102 break;
103 case '\b':
104 printf("\t'\\b'\n");
105 break;
106 case '\t':
107 printf("\t'\\t'\n");
108 break;
109 case '\r':
110 printf("\t'\\r'\n");
111 break;
112 default:
113 if (ch < 32 || ch > 127)
114 printf("\\%03o", ch);
115 else
116 printf("\t'%c'\n", ch);
117 break;
118 }
119 break;
62957726 120 case sc_SapReg:
121#ifdef sc_WordPointerReg
122 case sc_WordPointerReg:
123#endif
44e0351e 124 printf("\t0x%08lx\n", SC_REG(context, offset));
9a8c1c2f 125 break;
62957726 126 case sc_SignedReg:
44e0351e 127 printf("\t%ld\n", SC_REG(context, offset));
9a8c1c2f 128 break;
62957726 129 case sc_UnsignedReg:
44e0351e 130 printf("\t%lu\n", SC_REG(context, offset));
9a8c1c2f 131 break;
132#if 0 /* broken */
9aa51d29 133#ifdef sc_SingleReg
134 case sc_SingleReg:
9a8c1c2f 135 printf("\t%g\n", *(float *) &context->sc_fpregs[offset]);
136 break;
62957726 137#endif
9aa51d29 138#ifdef sc_DoubleReg
139 case sc_DoubleReg:
9a8c1c2f 140 printf("\t%g\n", *(double *) &context->sc_fpregs[offset]);
141 break;
62957726 142#endif
9aa51d29 143#ifdef sc_LongReg
144 case sc_LongReg:
9a8c1c2f 145 printf("\t%Lg\n", *(long double *) &context->sc_fpregs[offset]);
146 break;
9aa51d29 147#endif
148#endif
62957726 149 default:
9a8c1c2f 150 printf("\t???\n");
151 break;
62957726 152 }
153 }
154
155 lose(NULL);
156}
9a8c1c2f 157\f
62957726 158
159
160
62957726 161/* Utility routines used by random pieces of code. */
162
88d77e83 163#if defined(UNICODE)
0dae4884
RT
164
165/* The Unicode replacement character code */
166#define REPLACEMENT_CODE 0xfffd
167
88d77e83
RT
168/*
169 * Convert a unicode code point to a set of utf8-encoded octets to
170 * standard output. This is the algorithm used by the Lisp utf8
171 * encoder in src/code/extfmts.lisp.
172 */
173static void
174utf8(int code, int len)
175{
176 int k;
177 int j = 6 - len;
178 int p = 6 * len;
179 int init = 0xff & (0x7e << j);
180 int c;
181
182 /*
183 * (ldb (byte j p) code): Extract j bits from position p of the code
184 */
185 c = (code >> p) & ((1 << j) - 1);
186
187 putchar(init | c);
188
189 for (k = 0; k < len; ++k) {
190 p -= 6;
191 /* (ldb (byte 6 p) code) */
192 c = (code >> p) & ((1 << 6) - 1);
193 putchar(128 | c);
194 }
195}
196
197/*
198 * Test if code is a surrogate. Returns true if so. If the code is a
199 * surrogate, then type indicates if it is a high (0) or low (1)
200 * surrogate. If not a surrogate, type is not modified. If type is
201 * NULL, then no type is returned.
202 */
0dae4884 203static boolean
88d77e83
RT
204surrogatep(int code, int *type)
205{
206 boolean result;
207
208 if ((code >> 11) == 0x1b) {
209 result = 1;
210 if (type) {
211 *type = (code >> 10) & 1;
212 }
213 } else {
214 result = 0;
215 }
216
217 return result;
218}
219
220/*
221 * Convert one or two utf16 code units into a code point. utf16
222 * points to the string, len is the length of the string. The
223 * codepoint is returned and the number of code units consumed is
224 * returned in consumed.
225 */
0dae4884 226static int
88d77e83
RT
227utf16_codepoint(unsigned short int* utf16, int len, int* consumed)
228{
ff569406
RT
229 int codepoint = REPLACEMENT_CODE;
230 int code_unit = *utf16;
88d77e83 231 int code_type;
ff569406 232 int read = 1;
88d77e83
RT
233
234 /*
235 * If the current code unit is not a surrogate, we're done.
ff569406
RT
236 * Otherwise process the surrogate. If this is a high (leading)
237 * surrogate and the next code unit is a low (trailing) surrogate,
238 * compute the code point. Otherwise we have a bare surrogate or
239 * an invalid surrogate sequence, so just return the replacement
240 * character.
88d77e83
RT
241 */
242
ff569406
RT
243 if (surrogatep(code_unit, &code_type)) {
244 if (code_type == 0 && len > 0) {
245 int next_unit = utf16[1];
88d77e83 246 int next_type;
ff569406
RT
247 if (surrogatep(next_unit, &next_type)) {
248 if (next_type == 1) {
88d77e83 249 /* High followed by low surrogate */
ff569406 250 codepoint = ((code_unit - 0xd800) << 10) + next_unit + 0x2400;
88d77e83 251 ++read;
88d77e83 252 }
88d77e83 253 }
88d77e83 254 }
ff569406
RT
255 } else {
256 codepoint = code_unit;
88d77e83
RT
257 }
258
259 *consumed = read;
ff569406 260 return codepoint;
88d77e83
RT
261}
262
263/*
264 * Send the utf-16 Lisp unicode string to standard output as a
265 * utf8-encoded sequence of octets.
266 */
267static void
268utf16_output(unsigned short int* utf16, int len)
269{
270 while (len) {
271 int consumed;
272 int code = utf16_codepoint(utf16, len, &consumed);
273
274 /* Output the codepoint */
275 if (code < 0x80) {
276 putchar(code);
277 } else if (code < 0x800) {
278 utf8(code, 1);
279 } else if (code < 0x10000) {
280 utf8(code, 2);
281 } else if (code < 0x110000) {
282 utf8(code, 3);
283 } else {
284 /*
285 * This shouldn't happen, but if it does we don't want to
286 * signal any kind of error so just output a question mark
287 * so we can continue.
288 */
289 putchar('?');
290 }
291
292 len -= consumed;
293 utf16 += consumed;
294 }
295}
296#endif
297
298/*
299 * debug_print is used by %primitive print to output a string.
300 */
9a8c1c2f 301lispobj
68ac9a3e 302debug_print(lispobj object)
62957726 303{
68ac9a3e 304
305#ifndef UNICODE
306 printf("%s\n", (char *) (((struct vector *) PTR(object))->data));
f7f9348b 307 fflush(stdout);
68ac9a3e 308#else
309 if (Pointerp(object)) {
310 struct vector *lisp_string = (struct vector*) PTR(object);
311
312 if ((unsigned long) lisp_string->header == type_SimpleString) {
313 unsigned short int* lisp_chars;
314 int len;
68ac9a3e 315
316 len = lisp_string->length >> 2;
317 lisp_chars = (unsigned short int*) lisp_string->data;
2997c7c8 318
88d77e83 319 utf16_output(lisp_chars, len);
68ac9a3e 320 putchar('\n');
321
322 fflush(stdout);
323 } else {
324 print(object);
325 }
326 } else {
88d77e83 327 /*
ff569406
RT
328 * We shouldn't actually ever get here because %primitive
329 * print is only supposed to take strings. But if we do, it's
88d77e83
RT
330 * useful to print something out anyway.
331 */
68ac9a3e 332#if 1
333 printf("obj @0x%lx: ", (unsigned long) object);
334#endif
335 print(object);
336 }
337#endif
62957726 338 return NIL;
339}
68ac9a3e 340