4 * From osf1-os.c,v 1.1 94/03/27 15:30:51 hallgren Exp $
6 * OS-dependent routines. This file (along with os.h) exports an
7 * OS-independent interface to the operating system VM facilities.
8 * Suprisingly, this interface looks a lot like the Mach interface
9 * (but simpler in some places). For some operating systems, a subset
10 * of these functions will have to be emulated.
12 * This is the OSF1 version. By Sean Hallgren.
13 * Much hacked by Paul Werkowski
14 * Morfed from the FreeBSD file by Peter Van Eynde (July 1996)
15 * GENCGC support by Douglas Crosher, 1996, 1997.
16 * Alpha support by Julian Dolby, 1999.
21 #include <sys/param.h>
27 #include "interrupt.h"
29 #include "internals.h"
30 #include <sys/socket.h>
31 #include <sys/utsname.h>
33 #include <sys/types.h>
35 /* #include <sys/sysinfo.h> */
39 #include <sys/resource.h>
46 size_t os_vm_page_size;
53 #if defined(__i386) || defined(__x86_64)
54 /* Prototype for personality(2). Done inline here since the header file
55 * for this isn't available on old versions of glibc. */
56 int personality (unsigned long);
58 #if !defined(ADDR_NO_RANDOMIZE)
59 #define ADDR_NO_RANDOMIZE 0x40000
61 /* From personality(2) */
62 #define CURRENT_PERSONALITY 0xffffffffUL
66 check_personality(struct utsname *name, char *const *argv, char *const *envp)
68 /* KLUDGE: Disable memory randomization on new Linux kernels
69 * by setting a personality flag and re-executing. (We need
70 * to re-execute, since the memory maps that can conflict with
71 * the CMUCL spaces have already been done at this point).
73 * Since randomization is currently implemented only on x86 kernels,
74 * don't do this trick on other platforms.
76 #if defined(__i386) || defined(__x86_64)
77 int major_version, minor_version, patch_version;
80 major_version = atoi(p);
82 minor_version = atoi(p);
84 patch_version = atoi(p);
86 if ((major_version == 2
87 /* Some old kernels will apparently lose unsupported personality flags
89 && ((minor_version == 6 && patch_version >= 11)
90 || (minor_version > 6)
91 /* This is what RHEL 3 reports */
92 || (minor_version == 4 && patch_version > 20)))
93 || major_version >= 3)
95 int pers = personality(CURRENT_PERSONALITY);
96 if (!(pers & ADDR_NO_RANDOMIZE)) {
97 int retval = personality(pers | ADDR_NO_RANDOMIZE);
98 /* Allegedly some Linux kernels (the reported case was
99 * "hardened Linux 2.6.7") won't set the new personality,
100 * but nor will they return -1 for an error. So as a
101 * workaround query the new personality...
103 int newpers = personality(CURRENT_PERSONALITY);
104 /* ... and don't re-execute if either the setting resulted
105 * in an error or if the value didn't change. Otherwise
106 * this might result in an infinite loop.
108 if (retval != -1 && newpers != pers) {
109 /* Use /proc/self/exe instead of trying to figure out
110 * the executable path from PATH and argv[0], since
111 * that's unreliable. We follow the symlink instead of
112 * executing the file directly in order to prevent top
113 * from displaying the name of the process as "exe". */
114 char runtime[PATH_MAX+1];
115 int i = readlink("/proc/self/exe", runtime, PATH_MAX);
118 execve(runtime, argv, envp);
121 /* Either changing the personality or execve() failed. Either
122 * way we might as well continue, and hope that the random
123 * memory maps are ok this time around.
125 fprintf(stderr, "WARNING: Couldn't re-execute CMUCL with the proper personality flags"
126 "(maybe /proc isn't mounted?). Trying to continue anyway.\n");
133 * Check personality here, before we start processing command line
134 * args. (Previously it was done in os_init.) check_personality
135 * can re-exec us, so we end up parsing the command line args
136 * twice. Not usually a problem unless the processing causes
137 * output, which can be confusing.
141 os_init0(const char *argv[], const char *envp[])
146 check_personality(&name, (char *const *) argv, (char *const *) envp);
150 os_init(const char *argv[], const char *envp[])
156 /* We need this for mmap */
158 if (name.release[0] < '2') {
159 printf("Linux version must be later then 2.0.0!\n");
163 os_vm_page_size = getpagesize();
168 os_sigcontext_reg(ucontext_t *scp, int offset)
172 return (unsigned long *) &scp->uc_mcontext.gregs[REG_EAX];
174 return (unsigned long *) &scp->uc_mcontext.gregs[REG_ECX];
176 return (unsigned long *) &scp->uc_mcontext.gregs[REG_EDX];
178 return (unsigned long *) &scp->uc_mcontext.gregs[REG_EBX];
180 return (unsigned long *) &scp->uc_mcontext.gregs[REG_ESP];
182 return (unsigned long *) &scp->uc_mcontext.gregs[REG_EBP];
184 return (unsigned long *) &scp->uc_mcontext.gregs[REG_ESI];
186 return (unsigned long *) &scp->uc_mcontext.gregs[REG_EDI];
192 os_sigcontext_pc(ucontext_t *scp)
194 return (unsigned long *) &scp->uc_mcontext.gregs[REG_EIP];
198 os_sigcontext_fpu_reg(ucontext_t *scp, int offset)
200 fpregset_t fpregs = scp->uc_mcontext.fpregs;
201 unsigned char *reg = NULL;
205 reg = (unsigned char *) &fpregs->_st[offset];
209 struct _fpstate *fpstate;
210 fpstate = (struct _fpstate*) scp->uc_mcontext.fpregs;
211 if (fpstate->magic != 0xffff) {
212 reg = (unsigned char *) &fpstate->_xmm[offset - 8];
221 os_sigcontext_fpu_modes(ucontext_t *scp)
224 unsigned short cw, sw;
226 if (scp->uc_mcontext.fpregs == NULL) {
230 cw = scp->uc_mcontext.fpregs->cw & 0xffff;
231 sw = scp->uc_mcontext.fpregs->sw & 0xffff;
234 modes = ((cw & 0x3f) << 7) | (sw & 0x3f);
238 * Add in the SSE2 part, if we're running the sse2 core.
240 if (fpu_mode == SSE2) {
241 struct _fpstate *fpstate;
244 fpstate = (struct _fpstate*) scp->uc_mcontext.fpregs;
245 if (fpstate->magic == 0xffff) {
248 mxcsr = fpstate->mxcsr;
249 DPRINTF(0, (stderr, "SSE2 modes = %08lx\n", mxcsr));
256 modes ^= (0x3f << 7);
263 sc_reg(ucontext_t *c, int offset)
267 return &c->uc_mcontext.gregs[REG_RAX];
269 return &c->uc_mcontext.gregs[REG_RCX];
271 return &c->uc_mcontext.gregs[REG_RDX];
273 return &c->uc_mcontext.gregs[REG_RBX];
275 return &c->uc_mcontext.gregs[REG_RSP];
277 return &c->uc_mcontext.gregs[REG_RBP];
279 return &c->uc_mcontext.gregs[REG_RSI];
281 return &c->uc_mcontext.gregs[REG_RDI];
283 return &c->uc_mcontext.gregs[REG_R8];
285 return &c->uc_mcontext.gregs[REG_R9];
287 return &c->uc_mcontext.gregs[REG_R10];
289 return &c->uc_mcontext.gregs[REG_R11];
291 return &c->uc_mcontext.gregs[REG_R12];
293 return &c->uc_mcontext.gregs[REG_R13];
295 return &c->uc_mcontext.gregs[REG_R14];
297 return &c->uc_mcontext.gregs[REG_R15];
304 os_validate(os_vm_address_t addr, os_vm_size_t len)
306 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
311 addr = mmap(addr, len, OS_VM_PROT_ALL, flags, -1, 0);
313 if (addr == (os_vm_address_t) - 1) {
322 os_invalidate(os_vm_address_t addr, os_vm_size_t len)
324 DPRINTF(0, (stderr, "os_invalidate %p %d\n", addr, len));
326 if (munmap(addr, len) == -1)
331 os_map(int fd, int offset, os_vm_address_t addr, os_vm_size_t len)
333 addr = mmap(addr, len,
335 MAP_PRIVATE | MAP_FILE | MAP_FIXED, fd, (off_t) offset);
337 if (addr == (os_vm_address_t) - 1)
344 os_flush_icache(os_vm_address_t address, os_vm_size_t length)
349 os_protect(os_vm_address_t address, os_vm_size_t length, os_vm_prot_t prot)
351 if (mprotect(address, length, prot) == -1)
358 in_range_p(os_vm_address_t a, lispobj sbeg, size_t slen)
360 char *beg = (char *) sbeg;
361 char *end = (char *) sbeg + slen;
362 char *adr = (char *) a;
364 return (adr >= beg && adr < end);
368 valid_addr(os_vm_address_t addr)
370 os_vm_address_t newaddr;
372 newaddr = os_trunc_to_page(addr);
374 if (in_range_p(addr, READ_ONLY_SPACE_START, read_only_space_size)
375 || in_range_p(addr, STATIC_SPACE_START, static_space_size)
376 || in_range_p(addr, DYNAMIC_0_SPACE_START, dynamic_space_size)
377 || in_range_p(addr, DYNAMIC_1_SPACE_START, dynamic_space_size)
378 || in_range_p(addr, CONTROL_STACK_START, control_stack_size)
379 || in_range_p(addr, BINDING_STACK_START, binding_stack_size))
388 sigsegv_handle_now(HANDLER_ARGS)
390 interrupt_handle_now(signal, code, context);
393 static int tramp_signal;
394 static siginfo_t tramp_code;
395 static ucontext_t tramp_context;
398 sigsegv_handler_tramp(void)
400 sigsegv_handle_now(tramp_signal, &tramp_code, &tramp_context);
405 sigsegv_handler(HANDLER_ARGS)
407 os_context_t *os_context = (os_context_t *) context;
408 int fault_addr = os_context->uc_mcontext.cr2;
411 if (os_control_stack_overflow((void *) fault_addr, os_context))
414 if (gc_write_barrier(code->si_addr))
416 #if defined(__x86_64)
417 DPRINTF(0, (stderr, "sigsegv: rip: %p\n", os_context->uc_mcontext.gregs[REG_RIP]));
419 DPRINTF(0, (stderr, "sigsegv: eip: %x\n", os_context->uc_mcontext.gregs[REG_EIP]));
424 /* Switch back to the normal stack and invoke the Lisp signal
425 handler there. Global variables are used to pass the context
426 to the other stack. */
427 tramp_signal = signal;
429 tramp_context = *os_context;
430 SC_PC(os_context) = (unsigned long) sigsegv_handler_tramp;
435 sigsegv_handle_now(signal, code, os_context);
439 sigsegv_handler(HANDLER_ARGS)
441 os_vm_address_t addr;
443 DPRINTF(0, (stderr, "sigsegv\n"));
445 interrupt_handle_now(signal, contextstruct);
447 #define CONTROL_STACK_TOP (((char*) CONTROL_STACK_START) + control_stack_size)
449 addr = arch_get_bad_addr(signal, code, context);
451 if (addr != NULL && context->sc_regs[reg_ALLOC] & (1 << 63)) {
452 context->sc_regs[reg_ALLOC] -= (1 << 63);
453 interrupt_handle_pending(context);
454 } else if (addr > CONTROL_STACK_TOP && addr < BINDING_STACK_START) {
455 fprintf(stderr, "Possible stack overflow at 0x%08lX!\n", addr);
456 /* try to fix control frame pointer */
457 while (!(CONTROL_STACK_START <= *current_control_frame_pointer &&
458 *current_control_frame_pointer <= CONTROL_STACK_TOP))
459 ((char *) current_control_frame_pointer) -= sizeof(lispobj);
461 } else if (!interrupt_maybe_gc(signal, code, context))
462 interrupt_handle_now(signal, code, context);
468 sigbus_handler(HANDLER_ARGS)
470 DPRINTF(1, (stderr, "sigbus:\n")); /* there is no sigbus in linux??? */
471 interrupt_handle_now(signal, code, context);
475 os_install_interrupt_handlers(void)
477 interrupt_install_low_level_handler(SIGSEGV, sigsegv_handler);
478 interrupt_install_low_level_handler(SIGBUS, sigbus_handler);
481 /* Some symbols, most notably stat and lstat, don't appear at all in
482 the glibc .so files as a result of preprocessor and linker magic /
483 braindamage. So, try falling back to a stub in linux-stubs.S that
484 will call the proper function if it's one of those. */
487 dlsym_fallback(void *handle, const char *name)
492 strcpy(newsym, "PVE_stub_");
493 strcat(newsym, name);
494 sym_addr = dlsym(handle, newsym);
497 fputs(dlerror(), stderr);
504 os_dlsym(const char *sym_name, lispobj lib_list)
506 static void *program_handle;
510 program_handle = dlopen((void *) 0, RTLD_LAZY | RTLD_GLOBAL);
511 if (lib_list != NIL) {
512 lispobj lib_list_head;
514 for (lib_list_head = lib_list;
515 lib_list_head != NIL; lib_list_head = (CONS(lib_list_head))->cdr) {
516 struct cons *lib_cons = CONS(CONS(lib_list_head)->car);
517 struct sap *dlhandle = (struct sap *) PTR(lib_cons->car);
519 sym_addr = dlsym((void *) dlhandle->pointer, sym_name);
524 sym_addr = dlsym(program_handle, sym_name);
525 if (!sym_addr && dlerror()) {
526 return dlsym_fallback(program_handle, sym_name);
533 restore_fpu(ucontext_t *context)
535 if (context->uc_mcontext.fpregs) {
536 short cw = context->uc_mcontext.fpregs->cw;
537 DPRINTF(0, (stderr, "restore_fpu: cw = %08x\n", cw));
538 __asm__ __volatile__ ("fldcw %0" : : "m" (*&cw));
540 if (fpu_mode == SSE2) {
541 struct _fpstate *fpstate;
544 fpstate = (struct _fpstate*) context->uc_mcontext.fpregs;
545 if (fpstate->magic != 0xffff) {
546 mxcsr = fpstate->mxcsr;
547 DPRINTF(0, (stderr, "restore_fpu: mxcsr (raw) = %04x\n", mxcsr));
548 __asm__ __volatile__ ("ldmxcsr %0" :: "m" (*&mxcsr));