/*--------------------------------------------------------------------*/ /*--- Simulation of Local Descriptor Tables x86-linux/ldt.c ---*/ /*--------------------------------------------------------------------*/ /* This file is part of Valgrind, a dynamic binary instrumentation framework. Copyright (C) 2000-2005 Julian Seward jseward@acm.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING. */ /* Details of the LDT simulation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When a program runs natively, the linux kernel allows each *thread* in it to have its own LDT. Almost all programs never do this -- it's wildly unportable, after all -- and so the kernel never allocates the structure, which is just as well as an LDT occupies 64k of memory (8192 entries of size 8 bytes). A thread may choose to modify its LDT entries, by doing the __NR_modify_ldt syscall. In such a situation the kernel will then allocate an LDT structure for it. Each LDT entry is basically a (base, limit) pair. A virtual address in a specific segment is translated to a linear address by adding the segment's base value. In addition, the virtual address must not exceed the limit value. To use an LDT entry, a thread loads one of the segment registers (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0 .. 8191) it wants to use. In fact, the required value is (index << 3) + 7, but that's not important right now. Any normal instruction which includes an addressing mode can then be made relative to that LDT entry by prefixing the insn with a so-called segment-override prefix, a byte which indicates which of the 6 segment registers holds the LDT index. Now, a key constraint is that valgrind's address checks operate in terms of linear addresses. So we have to explicitly translate virtual addrs into linear addrs, and that means doing a complete LDT simulation. Calls to modify_ldt are intercepted. For each thread, we maintain an LDT (with the same normally-never-allocated optimisation that the kernel does). This is updated as expected via calls to modify_ldt. When a thread does an amode calculation involving a segment override prefix, the relevant LDT entry for the thread is consulted. It all works. There is a conceptual problem, which appears when switching back to native execution, either temporarily to pass syscalls to the kernel, or permanently, when debugging V. Problem at such points is that it's pretty pointless to copy the simulated machine's segment registers to the real machine, because we'd also need to copy the simulated LDT into the real one, and that's prohibitively expensive. Fortunately it looks like no syscalls rely on the segment regs or LDT being correct, so we can get away with it. Apart from that the simulation is pretty straightforward. All 6 segment registers are tracked, although only %ds, %es, %fs and %gs are allowed as prefixes. Perhaps it could be restricted even more than that -- I am not sure what is and isn't allowed in user-mode. */ #include "core.h" #include "x86_private.h" #include "libvex_guest_x86.h" /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using the Linux kernel's logic (cut-n-paste of code in linux/kernel/ldt.c). */ static void translate_to_hw_format ( /* IN */ vki_modify_ldt_t* inn, /* OUT */ VexGuestX86SegDescr* out, Int oldmode ) { UInt entry_1, entry_2; vg_assert(8 == sizeof(VexGuestX86SegDescr)); if (0) VG_(printf)("translate_to_hw_format: base %p, limit %d\n", inn->base_addr, inn->limit ); /* Allow LDTs to be cleared by the user. */ if (inn->base_addr == 0 && inn->limit == 0) { if (oldmode || (inn->contents == 0 && inn->read_exec_only == 1 && inn->seg_32bit == 0 && inn->limit_in_pages == 0 && inn->seg_not_present == 1 && inn->useable == 0 )) { entry_1 = 0; entry_2 = 0; goto install; } } entry_1 = ((inn->base_addr & 0x0000ffff) << 16) | (inn->limit & 0x0ffff); entry_2 = (inn->base_addr & 0xff000000) | ((inn->base_addr & 0x00ff0000) >> 16) | (inn->limit & 0xf0000) | ((inn->read_exec_only ^ 1) << 9) | (inn->contents << 10) | ((inn->seg_not_present ^ 1) << 15) | (inn->seg_32bit << 22) | (inn->limit_in_pages << 23) | 0x7000; if (!oldmode) entry_2 |= (inn->useable << 20); /* Install the new entry ... */ install: out->LdtEnt.Words.word1 = entry_1; out->LdtEnt.Words.word2 = entry_2; } /* * linux/kernel/ldt.c * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar */ /* * read_ldt() is not really atomic - this is not a problem since * synchronization of reads and writes done to the LDT has to be * assured by user-space anyway. Writes are atomic, to protect * the security checks done on new descriptors. */ static Int read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount ) { Int err; UInt i, size; UChar* ldt; if (0) VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n", tid, ptr, bytecount ); vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*)); vg_assert(8 == sizeof(VexGuestX86SegDescr)); ldt = (Char*)(VG_(threads)[tid].arch.vex.guest_LDT); err = 0; if (ldt == NULL) /* LDT not allocated, meaning all entries are null */ goto out; size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr); if (size > bytecount) size = bytecount; err = size; for (i = 0; i < size; i++) ptr[i] = ldt[i]; out: return err; } static Int write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode ) { Int error; VexGuestX86SegDescr* ldt; vki_modify_ldt_t* ldt_info; if (0) VG_(printf)("write_ldt: tid = %d, ptr = %p, " "bytecount = %d, oldmode = %d\n", tid, ptr, bytecount, oldmode ); vg_assert(8 == sizeof(VexGuestX86SegDescr)); vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*)); ldt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT; ldt_info = (vki_modify_ldt_t*)ptr; error = -VKI_EINVAL; if (bytecount != sizeof(vki_modify_ldt_t)) goto out; error = -VKI_EINVAL; if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT) goto out; if (ldt_info->contents == 3) { if (oldmode) goto out; if (ldt_info->seg_not_present == 0) goto out; } /* If this thread doesn't have an LDT, we'd better allocate it now. */ if (ldt == (HWord)NULL) { ldt = VG_(alloc_zeroed_x86_LDT)(); VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt; } /* Install the new entry ... */ translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode ); error = 0; out: return error; } Int VG_(sys_modify_ldt) ( ThreadId tid, Int func, void* ptr, UInt bytecount ) { Int ret = -VKI_ENOSYS; switch (func) { case 0: ret = read_ldt(tid, ptr, bytecount); break; case 1: ret = write_ldt(tid, ptr, bytecount, 1); break; case 2: VG_(unimplemented)("sys_modify_ldt: func == 2"); /* god knows what this is about */ /* ret = read_default_ldt(ptr, bytecount); */ /*UNREACHED*/ break; case 0x11: ret = write_ldt(tid, ptr, bytecount, 0); break; } return ret; } Int VG_(sys_set_thread_area) ( ThreadId tid, vki_modify_ldt_t* info ) { Int idx; VexGuestX86SegDescr* gdt; vg_assert(8 == sizeof(VexGuestX86SegDescr)); vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*)); if (info == NULL) return -VKI_EFAULT; gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT; /* If the thread doesn't have a GDT, allocate it now. */ if (!gdt) { gdt = VG_(alloc_zeroed_x86_GDT)(); VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt; } idx = info->entry_number; if (idx == -1) { /* Find and use the first free entry. */ for (idx = 0; idx < VEX_GUEST_X86_GDT_NENT; idx++) { if (gdt[idx].LdtEnt.Words.word1 == 0 && gdt[idx].LdtEnt.Words.word2 == 0) break; } if (idx == VEX_GUEST_X86_GDT_NENT) return -VKI_ESRCH; } else if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT) { return -VKI_EINVAL; } translate_to_hw_format(info, &gdt[idx], 0); VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid, "set_thread_area(info->entry)", (Addr) & info->entry_number, sizeof(unsigned int) ); info->entry_number = idx; VG_TRACK( post_mem_write, Vg_CoreSysCall, tid, (Addr) & info->entry_number, sizeof(unsigned int) ); return 0; } Int VG_(sys_get_thread_area) ( ThreadId tid, vki_modify_ldt_t* info ) { Int idx; VexGuestX86SegDescr* gdt; vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*)); vg_assert(8 == sizeof(VexGuestX86SegDescr)); if (info == NULL) return -VKI_EFAULT; idx = info->entry_number; if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT) return -VKI_EINVAL; gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT; /* If the thread doesn't have a GDT, allocate it now. */ if (!gdt) { gdt = VG_(alloc_zeroed_x86_GDT)(); VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt; } info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) | ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) | gdt[idx].LdtEnt.Bits.BaseLow; info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) | gdt[idx].LdtEnt.Bits.LimitLow; info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big; info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3; info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1; info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity; info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1; info->useable = gdt[idx].LdtEnt.Bits.Sys; info->reserved = 0; return 0; } /*--------------------------------------------------------------------*/ /*--- end ---*/ /*--------------------------------------------------------------------*/