Change %primitive print.to output strings in utf8 instead of utf16.
[projects/cmucl/cmucl.git] / src / lisp / interr.c
CommitLineData
62957726 1/*
68ac9a3e 2 * $Header: /Volumes/share2/src/cmucl/cvs2git/cvsroot/src/lisp/interr.c,v 1.10 2009/06/11 16:04:01 rtoy Rel $
62957726 3 *
4 * Stuff to handle internal errors.
5 *
6 */
7
8#include <stdio.h>
9#include <stdarg.h>
3cefc21b 10#include <stdlib.h>
62957726 11
3861a7bf 12#include "arch.h"
62957726 13#include "signal.h"
14
15#include "lisp.h"
16#include "internals.h"
17#include "interr.h"
18#include "print.h"
19#include "lispregs.h"
62957726 20\f
9a8c1c2f 21
62957726 22/* Lossage handler. */
23
9a8c1c2f 24static void
25default_lossage_handler(void)
62957726 26{
27 exit(1);
28}
29
9a8c1c2f 30static void (*lossage_handler) (void) = default_lossage_handler;
62957726 31
9a8c1c2f 32void
33set_lossage_handler(void handler(void))
62957726 34{
35 lossage_handler = handler;
36}
37
9a8c1c2f 38void
39lose(char *fmt, ...)
62957726 40{
41 va_list ap;
42
43 if (fmt != NULL) {
44 va_start(ap, fmt);
45 vfprintf(stderr, fmt, ap);
46 fflush(stderr);
47 va_end(ap);
48 }
49 lossage_handler();
50}
62957726 51\f
9a8c1c2f 52
62957726 53/* Internal error handler for when the Lisp error system doesn't exist. */
54
55static char *errors[] = ERRORS;
56
9a8c1c2f 57void
58internal_error(os_context_t * context)
62957726 59{
60 unsigned char *ptr = arch_internal_error_arguments(context);
61 int len, scoffset, sc, offset, ch;
62
63 len = *ptr++;
64 printf("Error: %s\n", errors[*ptr++]);
65 len--;
66 while (len > 0) {
67 scoffset = *ptr++;
68 len--;
69 if (scoffset == 253) {
70 scoffset = *ptr++;
71 len--;
9a8c1c2f 72 } else if (scoffset == 254) {
73 scoffset = ptr[0] + ptr[1] * 256;
62957726 74 ptr += 2;
75 len -= 2;
9a8c1c2f 76 } else if (scoffset == 255) {
77 scoffset = ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
62957726 78 ptr += 4;
79 len -= 4;
80 }
81 sc = scoffset & 0x1f;
82 offset = scoffset >> 5;
9a8c1c2f 83
62957726 84 printf(" SC: %d, Offset: %d", sc, offset);
85 switch (sc) {
86 case sc_AnyReg:
87 case sc_DescriptorReg:
9a8c1c2f 88 putchar('\t');
89 brief_print(SC_REG(context, offset));
90 break;
62957726 91
92 case sc_BaseCharReg:
9a8c1c2f 93 ch = SC_REG(context, offset);
62957726 94#ifdef i386
9a8c1c2f 95 if (offset & 1)
96 ch = ch >> 8;
97 ch = ch & 0xff;
62957726 98#endif
9a8c1c2f 99 switch (ch) {
100 case '\n':
101 printf("\t'\\n'\n");
102 break;
103 case '\b':
104 printf("\t'\\b'\n");
105 break;
106 case '\t':
107 printf("\t'\\t'\n");
108 break;
109 case '\r':
110 printf("\t'\\r'\n");
111 break;
112 default:
113 if (ch < 32 || ch > 127)
114 printf("\\%03o", ch);
115 else
116 printf("\t'%c'\n", ch);
117 break;
118 }
119 break;
62957726 120 case sc_SapReg:
121#ifdef sc_WordPointerReg
122 case sc_WordPointerReg:
123#endif
44e0351e 124 printf("\t0x%08lx\n", SC_REG(context, offset));
9a8c1c2f 125 break;
62957726 126 case sc_SignedReg:
44e0351e 127 printf("\t%ld\n", SC_REG(context, offset));
9a8c1c2f 128 break;
62957726 129 case sc_UnsignedReg:
44e0351e 130 printf("\t%lu\n", SC_REG(context, offset));
9a8c1c2f 131 break;
132#if 0 /* broken */
9aa51d29 133#ifdef sc_SingleReg
134 case sc_SingleReg:
9a8c1c2f 135 printf("\t%g\n", *(float *) &context->sc_fpregs[offset]);
136 break;
62957726 137#endif
9aa51d29 138#ifdef sc_DoubleReg
139 case sc_DoubleReg:
9a8c1c2f 140 printf("\t%g\n", *(double *) &context->sc_fpregs[offset]);
141 break;
62957726 142#endif
9aa51d29 143#ifdef sc_LongReg
144 case sc_LongReg:
9a8c1c2f 145 printf("\t%Lg\n", *(long double *) &context->sc_fpregs[offset]);
146 break;
9aa51d29 147#endif
148#endif
62957726 149 default:
9a8c1c2f 150 printf("\t???\n");
151 break;
62957726 152 }
153 }
154
155 lose(NULL);
156}
9a8c1c2f 157\f
62957726 158
159
160
62957726 161/* Utility routines used by random pieces of code. */
162
88d77e83
RT
163#if defined(UNICODE)
164/*
165 * Convert a unicode code point to a set of utf8-encoded octets to
166 * standard output. This is the algorithm used by the Lisp utf8
167 * encoder in src/code/extfmts.lisp.
168 */
169static void
170utf8(int code, int len)
171{
172 int k;
173 int j = 6 - len;
174 int p = 6 * len;
175 int init = 0xff & (0x7e << j);
176 int c;
177
178 /*
179 * (ldb (byte j p) code): Extract j bits from position p of the code
180 */
181 c = (code >> p) & ((1 << j) - 1);
182
183 putchar(init | c);
184
185 for (k = 0; k < len; ++k) {
186 p -= 6;
187 /* (ldb (byte 6 p) code) */
188 c = (code >> p) & ((1 << 6) - 1);
189 putchar(128 | c);
190 }
191}
192
193/*
194 * Test if code is a surrogate. Returns true if so. If the code is a
195 * surrogate, then type indicates if it is a high (0) or low (1)
196 * surrogate. If not a surrogate, type is not modified. If type is
197 * NULL, then no type is returned.
198 */
199boolean
200surrogatep(int code, int *type)
201{
202 boolean result;
203
204 if ((code >> 11) == 0x1b) {
205 result = 1;
206 if (type) {
207 *type = (code >> 10) & 1;
208 }
209 } else {
210 result = 0;
211 }
212
213 return result;
214}
215
216/*
217 * Convert one or two utf16 code units into a code point. utf16
218 * points to the string, len is the length of the string. The
219 * codepoint is returned and the number of code units consumed is
220 * returned in consumed.
221 */
222int
223utf16_codepoint(unsigned short int* utf16, int len, int* consumed)
224{
225 int code = *utf16;
226 int read = 1;
227
228 int code_type;
229
230 /*
231 * If the current code unit is not a surrogate, we're done.
232 * Otherwise process the surrogate
233 */
234
235 if (surrogatep(code, &code_type)) {
236 /*
237 * Try to get the following surrogate, if there are still code
238 * units left. If not, we have a bare surrogate, so just
239 * return the replacement character.
240 */
241 if (len > 0) {
242 int next = utf16[1];
243 int next_type;
244 if (surrogatep(next, &next_type)) {
245 /* Got the following surrogate, so combine them if possible */
246 if ((code_type == 0) && (next_type == 1)) {
247 /* High followed by low surrogate */
248 code = ((code - 0xd800) << 10) + next + 0x2400;
249 ++read;
250 } else if ((code_type == 1) && (next_type == 0)) {
251 /* Low followed by high surrogate */
252 code = ((code - 0xd800) << 10) + next + 0x2400;;
253 ++read;
254 } else {
255 /* Give up */
256 code = 0xfffd;
257 }
258 } else {
259 /* Surrogate followed by non-surrogate. Give up */
260 code = 0xfffd;
261 }
262 } else {
263 code = 0xfffd;
264 }
265 }
266
267 *consumed = read;
268 return code;
269}
270
271/*
272 * Send the utf-16 Lisp unicode string to standard output as a
273 * utf8-encoded sequence of octets.
274 */
275static void
276utf16_output(unsigned short int* utf16, int len)
277{
278 while (len) {
279 int consumed;
280 int code = utf16_codepoint(utf16, len, &consumed);
281
282 /* Output the codepoint */
283 if (code < 0x80) {
284 putchar(code);
285 } else if (code < 0x800) {
286 utf8(code, 1);
287 } else if (code < 0x10000) {
288 utf8(code, 2);
289 } else if (code < 0x110000) {
290 utf8(code, 3);
291 } else {
292 /*
293 * This shouldn't happen, but if it does we don't want to
294 * signal any kind of error so just output a question mark
295 * so we can continue.
296 */
297 putchar('?');
298 }
299
300 len -= consumed;
301 utf16 += consumed;
302 }
303}
304#endif
305
306/*
307 * debug_print is used by %primitive print to output a string.
308 */
9a8c1c2f 309lispobj
68ac9a3e 310debug_print(lispobj object)
62957726 311{
68ac9a3e 312
313#ifndef UNICODE
314 printf("%s\n", (char *) (((struct vector *) PTR(object))->data));
f7f9348b 315 fflush(stdout);
68ac9a3e 316#else
317 if (Pointerp(object)) {
318 struct vector *lisp_string = (struct vector*) PTR(object);
319
320 if ((unsigned long) lisp_string->header == type_SimpleString) {
321 unsigned short int* lisp_chars;
322 int len;
68ac9a3e 323
324 len = lisp_string->length >> 2;
325 lisp_chars = (unsigned short int*) lisp_string->data;
2997c7c8 326
88d77e83 327 utf16_output(lisp_chars, len);
68ac9a3e 328 putchar('\n');
329
330 fflush(stdout);
331 } else {
332 print(object);
333 }
334 } else {
88d77e83
RT
335 /*
336 * We should actually ever get here because %primitive print
337 * is only supposed to take strings. But if we do, it's
338 * useful to print something out anyway.
339 */
68ac9a3e 340#if 1
341 printf("obj @0x%lx: ", (unsigned long) object);
342#endif
343 print(object);
344 }
345#endif
62957726 346 return NIL;
347}
68ac9a3e 348