Alexander Valitov wrote:
Hi,
I found the source of problem. It is GCC bug. The bug is DSE (dead storage elimination) related. The issue is valid for GCC 4.3.x, 4.4.x, 4.5.x. In 4.2.x and earlier problem does not exist.
I was wrong. It's not GCC bug. "GCC guys" told that problem is in l4sys package. Please see file: /l4/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ipc-l42-gcc3-nopic.h here l4_ipc_send_tag() misses "memory" constraint in clobber list. It definitely must have one. Please fix it.
At the same time following file contains correct l4_ipc_send_tag() implementation: /l4/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ipc-l42-gcc3-nopic.h So i guess the bug is just oversight. Anyway I think it's wise to look through l4sys for similar errors.
Best Regards, Alexander Valitov
And, by the way, we even got a patch with this and a few other missing asm hints added. It mostly covers ia32 code, and not all packages, but these are the cases I was able to locate quickly, and this potentially might save someone some gray hair.
---
From 8b3d54063ac591f3cee29ccdf6007b3e41d54138 Mon Sep 17 00:00:00 2001 From: Alexey Zaytsev zaytsev@altell.ru Date: Mon, 27 Apr 2009 17:06:08 +0400 Subject: [PATCH] Add missing memory access hints for inline asm.
Fixes at leas one manifested bug in ipc-l42-gcc3-nopic.h, probably more hidden ones.
Signed-off-by: Alexey Zaytsev zaytsev@altell.ru --- .../server/src/ARCH-amd64/boot32/boot_cpu.c | 2 +- pkg/bootstrap/server/src/startup.cc | 6 +++- pkg/l4con/server/src/main.c | 3 +- pkg/l4con/server/src/pslim.c | 3 +- pkg/l4io/server/lib-pci/src/arch-i386/pci-pc.c | 27 +++++++++++++------ .../include/ARCH-amd64/L4API-l4v2/ipc-l42-gcc3.h | 2 +- pkg/l4sys/include/ARCH-amd64/L4API-l4v2/kdebug.h | 2 +- .../ARCH-x86/L4API-l4v2/ipc-l42-gcc3-nopic.h | 8 ++++++ pkg/l4sys/include/ARCH-x86/L4API-l4v2/ktrace.h | 8 ++++- pkg/l4sys/include/ARCH-x86/L4API-l4v2/segment.h | 4 ++- pkg/l4util/include/ARCH-amd64/idt.h | 2 +- pkg/l4util/include/ARCH-amd64/perform.h | 6 ++-- pkg/l4util/include/ARCH-x86/perform.h | 6 ++-- 13 files changed, 53 insertions(+), 26 deletions(-)
diff --git a/pkg/bootstrap/server/src/ARCH-amd64/boot32/boot_cpu.c b/pkg/bootstrap/server/src/ARCH-amd64/boot32/boot_cpu.c index 863cb17..07f4828 100644 --- a/pkg/bootstrap/server/src/ARCH-amd64/boot32/boot_cpu.c +++ b/pkg/bootstrap/server/src/ARCH-amd64/boot32/boot_cpu.c @@ -382,7 +382,7 @@ base_gdt_load(void) set_gdt(&pdesc);
/* Reload all the segment registers from the new GDT. */ - asm volatile("ljmp %0,$1f ; 1:" : : "i" (KERNEL_CS)); + asm volatile("ljmp %0,$1f ; 1:" : : "i" (KERNEL_CS) : "memory"); set_ds(KERNEL_DS); set_es(KERNEL_DS); set_ss(KERNEL_DS); diff --git a/pkg/bootstrap/server/src/startup.cc b/pkg/bootstrap/server/src/startup.cc index 541f9e8..33c4e9c 100644 --- a/pkg/bootstrap/server/src/startup.cc +++ b/pkg/bootstrap/server/src/startup.cc @@ -1022,14 +1022,16 @@ startup(l4util_mb_info_t *mbi, l4_umword_t flag, : "a" (L4UTIL_MB_VALID), "b" (&kernel_mbi), "S" (realmode_si), - "r" (boot_info.kernel_start)); + "r" (boot_info.kernel_start) + : "memory");
#elif defined(ARCH_amd64)
asm volatile ("push $exit; jmp *%2" : - : "S" (L4UTIL_MB_VALID), "D" (&kernel_mbi), "r" (boot_info.kernel_start)); + : "S" (L4UTIL_MB_VALID), "D" (&kernel_mbi), "r" (boot_info.kernel_start) + : "memory");
#elif defined(ARCH_arm) typedef void (*startup_func)(void); diff --git a/pkg/l4con/server/src/main.c b/pkg/l4con/server/src/main.c index 5a39e29..1886d38 100644 --- a/pkg/l4con/server/src/main.c +++ b/pkg/l4con/server/src/main.c @@ -82,7 +82,8 @@ fast_memcpy(l4_uint8_t *dst, l4_uint8_t *src, l4_size_t size)
asm volatile ("cld ; rep movsl" :"=S"(dummy), "=D"(dummy), "=c"(dummy) - :"S"(src), "D"(dst),"c"(size/sizeof(l4_umword_t))); + :"S"(src), "D"(dst),"c"(size/sizeof(l4_umword_t)) + : "memory"); #else memcpy(dst, src, size); #endif diff --git a/pkg/l4con/server/src/pslim.c b/pkg/l4con/server/src/pslim.c index 2345d1d..5c499c5 100644 --- a/pkg/l4con/server/src/pslim.c +++ b/pkg/l4con/server/src/pslim.c @@ -524,7 +524,8 @@ _set16(l4_uint8_t *vfb, "dec %%ecx \n\t" "jnz 1b \n\t" : "=c"(dummy), "=d"(dummy) - : "c"(w/4), "S"(pmap), "D"(vfb)); + : "c"(w/4), "S"(pmap), "D"(vfb) + : "memory"); vfb += bwidth; pmap += pwidth; } diff --git a/pkg/l4io/server/lib-pci/src/arch-i386/pci-pc.c b/pkg/l4io/server/lib-pci/src/arch-i386/pci-pc.c index 752040d..616b55f 100644 --- a/pkg/l4io/server/lib-pci/src/arch-i386/pci-pc.c +++ b/pkg/l4io/server/lib-pci/src/arch-i386/pci-pc.c @@ -601,7 +601,8 @@ static unsigned long bios32_service(unsigned long service) "=d" (entry) : "0" (service), "1" (0), - "D" (&bios32_indirect)); + "D" (&bios32_indirect) + : "memory"); __restore_flags(flags);
switch (return_code) { @@ -691,7 +692,8 @@ static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short "c" (device_id), "d" (vendor), "S" ((int) index), - "D" (&pci_indirect)); + "D" (&pci_indirect) + : "memory"); *bus = (bx >> 8) & 0xff; *device_fn = bx & 0xff; return (int) (ret & 0xff00) >> 8; @@ -719,7 +721,8 @@ static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u : "1" (PCIBIOS_READ_CONFIG_BYTE), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect) + : "memory"); break; case 2: __asm__("lcall *(%%esi); cld\n\t" @@ -731,7 +734,8 @@ static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u : "1" (PCIBIOS_READ_CONFIG_WORD), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect) + : "memory"); break; case 4: __asm__("lcall *(%%esi); cld\n\t" @@ -743,7 +747,8 @@ static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u : "1" (PCIBIOS_READ_CONFIG_DWORD), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect) + : "memory"); break; }
@@ -774,7 +779,8 @@ static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect) + : "memory"); break; case 2: __asm__("lcall *(%%esi); cld\n\t" @@ -786,7 +792,8 @@ static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect) + : "memory"); break; case 4: __asm__("lcall *(%%esi); cld\n\t" @@ -798,7 +805,8 @@ static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect) + : "memory"); break; }
@@ -1057,7 +1065,8 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) : "0" (PCIBIOS_SET_PCI_HW_INT), "b" ((dev->bus->number << 8) | dev->devfn), "c" ((irq << 8) | (pin + 10)), - "S" (&pci_indirect)); + "S" (&pci_indirect) + : "memory"); return !(ret & 0xff00); }
diff --git a/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/ipc-l42-gcc3.h b/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/ipc-l42-gcc3.h index 790fdb6..463be31 100644 --- a/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/ipc-l42-gcc3.h +++ b/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/ipc-l42-gcc3.h @@ -132,7 +132,7 @@ l4_ipc_send_tag(l4_threadid_t dest, "S" (dest), "c" (tag.raw) : - "r9", "r10", "r11", "r12", "r13", "r14", "r15" + "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory" ); return L4_IPC_ERROR(*result); }; diff --git a/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/kdebug.h b/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/kdebug.h index 479288a..87802d4 100644 --- a/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/kdebug.h +++ b/pkg/l4sys/include/ARCH-amd64/L4API-l4v2/kdebug.h @@ -92,7 +92,7 @@ fiasco_register_lines(l4_taskid_t tid, l4_addr_t addr, l4_size_t size) L4_INLINE void fiasco_register_thread_name(l4_threadid_t tid, const char *name) { - asm("int $3; cmpb $30, %%al" : : "a" (name), "c" (3), "S"(tid.raw)); + asm("int $3; cmpb $30, %%al" : : "a" (name), "c" (3), "S"(tid.raw) : "memory"); }
L4_INLINE int diff --git a/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ipc-l42-gcc3-nopic.h b/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ipc-l42-gcc3-nopic.h index 2a5b7ea..e164373 100644 --- a/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ipc-l42-gcc3-nopic.h +++ b/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ipc-l42-gcc3-nopic.h @@ -131,6 +131,8 @@ l4_ipc_send_tag(l4_threadid_t dest, "c" (timeout), "S" (dest.raw), "D" (tag.raw) + : + "memory" ); return L4_IPC_ERROR(*result); } @@ -166,6 +168,8 @@ l4_ipc_wait_tag(l4_threadid_t *src, "a" (L4_IPC_NIL_DESCRIPTOR), "c" (timeout), [msg_desc] "ir"(((int)rcv_msg) | L4_IPC_OPEN_IPC) + : + "memory" ); return L4_IPC_ERROR(*result); } @@ -200,6 +204,8 @@ l4_ipc_wait_next_period(l4_threadid_t *src, "c" (timeout), [msg_desc] "ir"(((int)rcv_msg) | L4_IPC_OPEN_IPC), "D" (L4_IPC_FLAG_NEXT_PERIOD) /* no absolute timeout */ + : + "memory" ); return L4_IPC_ERROR(*result); } @@ -234,6 +240,8 @@ l4_ipc_receive_tag(l4_threadid_t src, "c" (timeout), "S" (src.raw), [msg_desc] "ir"(((int)rcv_msg) & (~L4_IPC_OPEN_IPC)) + : + "memory" ); return L4_IPC_ERROR(*result); } diff --git a/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ktrace.h b/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ktrace.h index 303199c..3a648fa 100644 --- a/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ktrace.h +++ b/pkg/l4sys/include/ARCH-x86/L4API-l4v2/ktrace.h @@ -214,7 +214,9 @@ fiasco_tbuf_log(const char *text) l4_umword_t offset; asm volatile("int $3; cmpb $29, %%al" : "=a" (offset) - : "a" (1), "d" (text)); + : "a" (1), "d" (text) + : "memory" + ); return offset; }
@@ -224,7 +226,9 @@ fiasco_tbuf_log_3val(const char *text, unsigned v1, unsigned v2, unsigned v3) l4_umword_t offset; asm volatile("int $3; cmpb $29, %%al" : "=a" (offset) - : "a" (4), "d" (text), "c" (v1), "S" (v2), "D" (v3)); + : "a" (4), "d" (text), "c" (v1), "S" (v2), "D" (v3) + : "memory" + ); return offset; }
diff --git a/pkg/l4sys/include/ARCH-x86/L4API-l4v2/segment.h b/pkg/l4sys/include/ARCH-x86/L4API-l4v2/segment.h index 10b84a5..56d3393 100644 --- a/pkg/l4sys/include/ARCH-x86/L4API-l4v2/segment.h +++ b/pkg/l4sys/include/ARCH-x86/L4API-l4v2/segment.h @@ -27,7 +27,9 @@ fiasco_gdt_set(void *desc, unsigned int size, "b"(size), "c"(entry_number_start), "d"(0), - "S"(tid.raw)); + "S"(tid.raw) + : "memory" + ); }
#endif /* ! __L4_SYS__ARCH_X86__L4API_L4V2__SEGMENT_H__ */ diff --git a/pkg/l4util/include/ARCH-amd64/idt.h b/pkg/l4util/include/ARCH-amd64/idt.h index 1292f37..8b23825 100644 --- a/pkg/l4util/include/ARCH-amd64/idt.h +++ b/pkg/l4util/include/ARCH-amd64/idt.h @@ -72,7 +72,7 @@ l4util_idt_init(l4util_idt_header_t *idt, int entries) static inline void l4util_idt_load(l4util_idt_header_t *idt) { - asm volatile ("lidt (%%rax) \n\t" : : "a" (idt)); + asm volatile ("lidt (%%rax) \n\t" : : "a" (idt) : "memory"); }
EXTERN_C_END diff --git a/pkg/l4util/include/ARCH-amd64/perform.h b/pkg/l4util/include/ARCH-amd64/perform.h index 81e7731..0145703 100644 --- a/pkg/l4util/include/ARCH-amd64/perform.h +++ b/pkg/l4util/include/ARCH-amd64/perform.h @@ -28,7 +28,7 @@ extern const char*strp6pmc_event(l4_uint32_t event);
/* P5/P6/K7 section */
-/* Makros for access to model specific registers (MSR) */ +/* Macros to access the model specific registers (MSR) */
/* Write the 64-Bit Model Specific Register. First argument is the register, second the 64-Bit value. This can only be called at priviledge level 0. @@ -99,7 +99,7 @@ l4_i586_read_event_counter_long(long long *counter0, long long *counter1) "mov %%rdx, 4(%%rsi)\n" : /* no output */ : "b" (counter0), "S" (counter1) - : "ax", "cx", "dx" + : "ax", "cx", "dx", "memory" ); }
@@ -404,7 +404,7 @@ static inline void l4_i686_select_perfctr0_event(long long *val){ //".byte 0xcc, 0xeb, 0x01, 0x21\n" : /* no output */ : "b" (val) - : "ax", "cx", "dx", "bx" + : "ax", "cx", "dx", "bx", "memory" );
} diff --git a/pkg/l4util/include/ARCH-x86/perform.h b/pkg/l4util/include/ARCH-x86/perform.h index 4b060b1..e00b3ff 100644 --- a/pkg/l4util/include/ARCH-x86/perform.h +++ b/pkg/l4util/include/ARCH-x86/perform.h @@ -99,7 +99,7 @@ l4_i586_read_event_counter_long(long long *counter0, long long *counter1) "movl %%edx, 4(%%esi)\n" : /* no output */ : "b" (counter0), "S" (counter1) - : "ax", "cx", "dx" + : "ax", "cx", "dx", "memory" ); }
@@ -376,7 +376,7 @@ l4_i586_select_event(int event0, int event1) movl %%eax, %0 # Low order 32 bits" \ : "=g" (*(int *)(res_p)), "=g" (*(((int *)res_p)+1)) \ : "g" (cntr) \ - : "ecx", "eax", "edx") + : "ecx", "eax", "edx", "memory")
static inline l4_uint32_t l4_i686_rdpmc_32(int cntr){ l4_uint32_t x; @@ -404,7 +404,7 @@ static inline void l4_i686_select_perfctr0_event(long long *val){ //".byte 0xcc, 0xeb, 0x01, 0x21\n" : /* no output */ : "b" (val) - : "ax", "cx", "dx", "bx" + : "ax", "cx", "dx", "bx", "memory" );
}