Featured image of post LLVM's `atomicrmw` instruction and x86_64

LLVM’s atomicrmw instruction and x86_64

A grimoire of how LLVM translates atomicrmw to x86_64 assembly.

Introduction

I’ve been working on an LLVM-IR-to-x86_64-assembly compiler recently. One of the instructions my compiler translates is the atomicrmw instruction, which does math/memory modification atomically. I tested the outputs for all the different operations except for the floating point ones because I’ve yet to implement floating point support. The results are listed below. Note that the generated assembly can vary depending on whether the result of the operation is used.

Full IR example

1
2
3
4
5
6
define void @atomic(i64 %0, i64* %1) align 2 {
    %3 = alloca i64, align 8
    %4 = atomicrmw volatile umin i64* %1, i64 %0 acq_rel, align 8
    store i64 %4, i64* %1, align 8
    ret void
}

The xchg operation

1
%3 = atomicrmw volatile xchg i64* %2, i64 %0 acq_rel, align 8
1
2
# Used or unused result
xchgq  %rdi, (%rsi)

The add operation

1
%3 = atomicrmw volatile add i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
# Used result
lock  xaddq  %rdi, -8(%rsp)

# Unused result
lock  addq  %rdi, -8(%rsp)

The sub operation

1
%3 = atomicrmw volatile sub i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
# Used result
negq  %rax
lock  xaddq  %rax, (%rsi)

# Unused result
negq  %rax
lock  addq  %rax, (%rsi)

The and operation

1
%3 = atomicrmw volatile and i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
8
9
# Used result
.LBB0_1:
movq  %rax, %rcx
andq  %rdi, %rcx
lock  cmpxchgq  %rcx, (%rsi)
jne   .LBB0_1

# Unused result
lock  andq  %rdi, -8(%rsp)

The nand operation

1
%3 = atomicrmw volatile nand i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
# Used or unused result
.LBB0_1:
movq  %rax, %rcx
andq  %rdi, %rcx
notq  %rcx
lock  cmpxchgq  %rcx, (%rsi)
jne   .LBB0_1

The or operation

1
%3 = atomicrmw volatile or i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
8
9
# Used result
.LBB0_1:
movq  %rax, %rcx
orq   %rdi, %rcx
lock  cmpxchgq  %rcx, (%rsi)
jne   .LBB0_1

# Unused result
lock  orq  %rdi, (%rsi)

The xor operation

1
%3 = atomicrmw volatile xor i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
8
9
# Used result
.LBB0_1:
movq  %rax, %rcx
xorq  %rdi, %rcx
lock  cmpxchgq  %rcx, (%rsi)
jne   .LBB0_1

# Unused result
lock  xorq  %rdi, (%rsi)

The max operation

1
%3 = atomicrmw volatile max i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
# Used or unused result
.LBB0_1:
cmpq    %rdi, %rax
movq    %rdi, %rcx
cmovgq  %rax, %rcx
lock    cmpxchgq  %rcx, (%rsi)
jne     .LBB0_1

The min operation

1
%3 = atomicrmw volatile min i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
# Used or unused result
.LBB0_1:
cmpq     %rdi, %rax
movq     %rdi, %rcx
cmovleq  %rax, %rcx
lock     cmpxchgq  %rcx, (%rsi)
jne      .LBB0_1

The umax operation

1
%3 = atomicrmw volatile umax i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
# Used or unused result
.LBB0_1:
cmpq    %rdi, %rax
movq    %rdi, %rcx
cmovaq  %rax, %rcx
lock    cmpxchgq  %rcx, (%rsi)
jne     .LBB0_1

The umin operation

1
%3 = atomicrmw volatile umin i64* %2, i64 %0 acq_rel, align 8
1
2
3
4
5
6
7
# Used or unused result
.LBB0_1:
cmpq     %rdi, %rax
movq     %rdi, %rcx
cmovbeq  %rax, %rcx
lock     cmpxchgq  %rcx, (%rsi)
jne      .LBB0_1