1
0
Fork 0

Fix atomic test_and_set and clear

The __atomic_test_and_set and __atomic_clear code was correct.
They needed locking.
Replaced to code with assembly code.
The changes are in include/stdatomic.h, lib/stdatomic.c, lib/atomic.S
Enabled tests/tests2/124_atomic_counter.c for apple again.

Also moved lib/fetch_and_add.S code to lib/atomic.S.
Removed lib/fetch_and_add.S
Adjusted lib/Makefile
This commit is contained in:
herman ten brugge 2022-11-17 05:57:21 -06:00
parent c8ef84c854
commit f89a6f12a7
6 changed files with 259 additions and 107 deletions

View File

@ -80,17 +80,12 @@ typedef struct {
#define ATOMIC_FLAG_INIT {0}
#define atomic_flag_test_and_set_explicit(object, order) \
({ bool ret, value = 1; \
__atomic_exchange(&(object)->value, &value, &ret, order); \
ret; \
})
__atomic_test_and_set((void *)(&((object)->value)), order)
#define atomic_flag_test_and_set(object) \
atomic_flag_test_and_set_explicit(object, __ATOMIC_SEQ_CST)
#define atomic_flag_clear_explicit(object, order) \
({ bool value = 0; \
__atomic_store(&(object)->value, &value, order); \
})
__atomic_clear((bool *)(&((object)->value)), order)
#define atomic_flag_clear(object) \
atomic_flag_clear_explicit(object, __ATOMIC_SEQ_CST)

View File

@ -42,11 +42,11 @@ $(X)BT_O += tcov.o
DSO_O = dsohandle.o
I386_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o $(BT_O)
X86_64_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o $(BT_O)
ARM_O = libtcc1.o armeabi.o alloca.o armflush.o fetch_and_add.o stdatomic.o atomic.o $(BT_O)
ARM64_O = lib-arm64.o fetch_and_add.o stdatomic.o atomic.o $(BT_O)
RISCV64_O = lib-arm64.o fetch_and_add.o stdatomic.o atomic.o $(BT_O)
I386_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o atomic.o $(BT_O)
X86_64_O = libtcc1.o alloca.o alloca-bt.o stdatomic.o atomic.o $(BT_O)
ARM_O = libtcc1.o armeabi.o alloca.o armflush.o stdatomic.o atomic.o $(BT_O)
ARM64_O = lib-arm64.o stdatomic.o atomic.o $(BT_O)
RISCV64_O = lib-arm64.o stdatomic.o atomic.o $(BT_O)
WIN_O = crt1.o crt1w.o wincrt1.o wincrt1w.o dllcrt1.o dllmain.o
OBJ-i386 = $(I386_O) $(BCHECK_O) $(DSO_O)

View File

@ -12,7 +12,49 @@
# define _(s) s
#endif
#if defined __arm__
#if defined __i386__
.text
.align 2
.global _(__atomic_test_and_set)
.type _(__atomic_test_and_set), %function
_(__atomic_test_and_set):
movl 4(%esp), %edx
movl $1, %eax
xchgb (%edx), %al
ret
.size _(__atomic_test_and_set), .-_(__atomic_test_and_set)
.global _(__atomic_clear)
.type _(__atomic_clear), %function
_(__atomic_clear):
movl 4(%esp), %edx
xorl %eax, %eax
xchgb (%edx), %al
ret
.size _(__atomic_clear), .-_(__atomic_clear)
#elif defined __x86_64__
.text
.align 2
.global _(__atomic_test_and_set)
.type _(__atomic_test_and_set), %function
_(__atomic_test_and_set):
movl $1, %eax
xchgb (%rdi), %al
ret
.size _(__atomic_test_and_set), .-_(__atomic_test_and_set)
.global _(__atomic_clear)
.type _(__atomic_clear), %function
_(__atomic_clear):
xorl %eax, %eax
xchgb (%rdi), %al
ret
.size _(__atomic_clear), .-_(__atomic_clear)
#elif defined __arm__
#ifndef __TINYC__
.arch armv6k
@ -21,6 +63,91 @@
.text
.align 2
.global _(fetch_and_add_arm)
.type _(fetch_and_add_arm), %function
_(fetch_and_add_arm):
mcr p15, #0, r0, c7, c10, #5
.L0:
ldrex r3, [r0]
add r3, r3, r1
strex r2, r3, [r0]
cmp r2, #0
bne .L0
mcr p15, #0, r0, c7, c10, #5
bx lr
.size _(fetch_and_add_arm), .-_(fetch_and_add_arm)
.global _(__atomic_test_and_set)
.type _(__atomic_test_and_set), %function
_(__atomic_test_and_set):
#ifdef __TINYC__
.int 0xe92d4030
.int 0xee070fba
.int 0xe5d03000
.int 0xe24dd014
.int 0xe1a05000
.int 0xe2533000
.int 0xe1a04001
.int 0x13a03001
.int 0xee070fba
.int 0xe5cd300f
.int 0xe3a03001
.int 0xe1a02003
.int 0xe28d100f
.int 0xe1a00005
.int 0xe58d4004
.int 0xe58d4000
.int 0xeb000009
.int 0xe3500000
.int 0x0afffff6
.int 0xe5dd000f
.int 0xe28dd014
.int 0xe8bd8030
#else
push {r4, r5, lr}
mcr p15, 0, r0, c7, c10, 5
ldrb r3, [r0]
sub sp, sp, #20
mov r5, r0
subs r3, r3, #0
mov r4, r1
movne r3, #1
mcr p15, 0, r0, c7, c10, 5
strb r3, [sp, #15]
.L20:
mov r3, #1
mov r2, r3
add r1, sp, #15
mov r0, r5
str r4, [sp, #4]
str r4, [sp]
bl __atomic_compare_exchange_1
cmp r0, #0
beq .L20
ldrb r0, [sp, #15]
add sp, sp, #20
pop {r4, r5, pc}
#endif
.size _(__atomic_test_and_set), .-_(__atomic_test_and_set)
.global _(__atomic_clear)
.type _(__atomic_clear), %function
_(__atomic_clear):
#ifdef __TINYC__
.int 0xe3a03000
.int 0xee070fba
.int 0xe5c03000
.int 0xee070fba
.int 0xe12fff1e
#else
mov r3, #0
mcr p15, 0, r0, c7, c10, 5
strb r3, [r0]
mcr p15, 0, r0, c7, c10, 5
bx lr
#endif
.size _(__atomic_clear), .-_(__atomic_clear)
.global _(__atomic_compare_exchange_1)
.type _(__atomic_compare_exchange_1), %function
_(__atomic_compare_exchange_1):
@ -141,6 +268,67 @@ _(__atomic_compare_exchange_4):
.text
.align 2
.global _(fetch_and_add_arm64)
.type _(fetch_and_add_arm64), %function
_(fetch_and_add_arm64):
#ifdef __TINYC__
.int 0x885f7c02
.int 0x0b010042
.int 0x8803fc02
.int 0x35ffffa3
.int 0xd5033bbf
.int 0xd65f03c0
#else
ldxr w2, [x0]
add w2, w2, w1
stlxr w3, w2, [x0]
cbnz w3, _(fetch_and_add_arm64)
dmb ish
ret
#endif
.size _(fetch_and_add_arm64), .-_(fetch_and_add_arm64)
.global _(__atomic_test_and_set)
.type _(__atomic_test_and_set), %function
_(__atomic_test_and_set):
#ifdef __TINYC__
.int 0xa9bf7bfd
.int 0xaa0003e1
.int 0x52800020
.int 0x910003fd
.int 0x2a0003f0
.int 0x085ffc20
.int 0x0811fc30
.int 0x35ffffd1
.int 0xa8c17bfd
.int 0xd65f03c0
#else
stp x29, x30, [sp, -16]!
mov x1, x0
mov w0, 1
mov x29, sp
mov w16, w0
.L20:
ldaxrb w0, [x1]
stlxrb w17, w16, [x1]
cbnz w17, .L20
ldp x29, x30, [sp], 16
ret
#endif
.size _(__atomic_test_and_set), .-_(__atomic_test_and_set)
.global _(__atomic_clear)
.type _(__atomic_clear), %function
_(__atomic_clear):
#ifdef __TINYC__
.int 0x089ffc1f
.int 0xd65f03c0
#else
stlrb wzr, [x0]
ret
#endif
.size _(__atomic_clear), .-_(__atomic_clear)
.global _(__atomic_compare_exchange_1)
.type _(__atomic_compare_exchange_1), %function
_(__atomic_compare_exchange_1):
@ -367,6 +555,63 @@ _(__atomic_compare_exchange_8):
.text
.align 2
.global _(fetch_and_add_riscv64)
.type _(fetch_and_add_riscv64), %function
_(fetch_and_add_riscv64):
#ifdef __TINYC__
.int 0x0f50000f
.int 0x004b5202f
.short 0x8082
#else
fence iorw,ow
amoadd.w.aq zero,a1,0(a0)
ret
#endif
.size _(fetch_and_add_riscv64), .-_(fetch_and_add_riscv64)
.global _(__atomic_test_and_set)
.type _(__atomic_test_and_set), %function
_(__atomic_test_and_set):
#ifdef __TINYC__
.int 0x00357793
.int 0x0037979b
.short 0x4685
.short 0x9971
.int 0x00f696bb
.int 0x0f50000f
.int 0x44d5272f
.int 0x00f7553b
.int 0x0ff57513
.short 0x8082
#else
andi a5,a0,3
slliw a5,a5,3
li a3,1
andi a0,a0,-4
sllw a3,a3,a5
fence iorw,ow; amoor.w.aq a4,a3,0(a0)
srlw a0,a4,a5
andi a0,a0,0xff
ret
#endif
.size _(__atomic_test_and_set), .-_(__atomic_test_and_set)
.global _(__atomic_clear)
.type _(__atomic_clear), %function
_(__atomic_clear):
#ifdef __TINYC__
.int 0x0ff0000f
.int 0x00050023
.int 0x0ff0000f
.short 0x8082
#else
fence iorw,iorw
sb zero,0(a0)
fence iorw,iorw
ret
#endif
.size _(__atomic_clear), .-_(__atomic_clear)
.global _(__atomic_compare_exchange_1)
.type _(__atomic_compare_exchange_1), %function
_(__atomic_compare_exchange_1):

View File

@ -1,76 +0,0 @@
/* ---------------------------------------------- */
#ifdef __leading_underscore
# define _(s) _##s
#else
# define _(s) s
#endif
.globl _(__bound_alloca)
_(__bound_alloca):
#if defined __arm__
.text
.align 2
.global _(fetch_and_add_arm)
.type _(fetch_and_add_arm), %function
_(fetch_and_add_arm):
mcr p15, #0, r0, c7, c10, #5
.L0:
ldrex r3, [r0]
add r3, r3, r1
strex r2, r3, [r0]
cmp r2, #0
bne .L0
mcr p15, #0, r0, c7, c10, #5
bx lr
.size _(fetch_and_add_arm), .-_(fetch_and_add_arm)
/* ---------------------------------------------- */
#elif defined __aarch64__
.text
.align 2
.global _(fetch_and_add_arm64)
.type _(fetch_and_add_arm64), %function
_(fetch_and_add_arm64):
#ifdef __TINYC__
.int 0x885f7c02
.int 0x0b010042
.int 0x8803fc02
.int 0x35ffffa3
.int 0xd5033bbf
.int 0xd65f03c0
#else
ldxr w2, [x0]
add w2, w2, w1
stlxr w3, w2, [x0]
cbnz w3, _(fetch_and_add_arm64)
dmb ish
ret
#endif
.size _(fetch_and_add_arm64), .-_(fetch_and_add_arm64)
/* ---------------------------------------------- */
#elif defined __riscv
.text
.align 2
.global _(fetch_and_add_riscv64)
.type _(fetch_and_add_riscv64), %function
_(fetch_and_add_riscv64):
#ifdef __TINYC__
.int 0x0f50000f
.int 0x004b5202f
.short 0x8082
#else
fence iorw,ow
amoadd.w.aq zero,a1,0(a0)
ret
#endif
.size _(fetch_and_add_riscv64), .-_(fetch_and_add_riscv64)
/* ---------------------------------------------- */
#endif

View File

@ -23,7 +23,7 @@ typedef __SIZE_TYPE__ size_t;
{ \
TYPE rv; \
TYPE cmp = *(TYPE *)ref; \
asm volatile( \
__asm__ volatile( \
"lock cmpxchg" SUFFIX " %2,%1\n" \
: "=a" (rv), "+m" (*(TYPE *)atom) \
: "q" (xchg), "0" (cmp) \
@ -115,16 +115,6 @@ ATOMIC_GEN(uint32_t, 4, "l")
ATOMIC_GEN(uint64_t, 8, "q")
#endif
bool __atomic_test_and_set (volatile void *ptr, int memorder)
{
return __atomic_exchange_1(ptr, 1, memorder);
}
void __atomic_clear (volatile void *ptr, int memorder)
{
__atomic_store_1(ptr, 0, memorder);
}
void __atomic_signal_fence (int memorder)
{
}
@ -132,15 +122,15 @@ void __atomic_signal_fence (int memorder)
void __atomic_thread_fence (int memorder)
{
#if defined __i386__
asm volatile("lock orl $0, (%esp)");
__asm__ volatile("lock orl $0, (%esp)");
#elif defined __x86_64__
asm volatile("lock orq $0, (%rsp)");
__asm__ volatile("lock orq $0, (%rsp)");
#elif defined __arm__
asm volatile(".int 0xee070fba"); // mcr p15, 0, r0, c7, c10, 5
__asm__ volatile(".int 0xee070fba"); // mcr p15, 0, r0, c7, c10, 5
#elif defined __aarch64__
asm volatile(".int 0xd5033bbf"); // dmb ish
__asm__ volatile(".int 0xd5033bbf"); // dmb ish
#elif defined __riscv
asm volatile(".int 0x0ff0000f"); // fence iorw,iorw
__asm__ volatile(".int 0x0ff0000f"); // fence iorw,iorw
#endif
}

View File

@ -14,11 +14,9 @@
abort(); \
} while (0)
#ifndef __APPLE__
#if defined __x86_64__ || defined __aarch64__ || defined __riscv
#define HAS_64BITS
#endif
#endif
typedef struct {
atomic_flag flag;