[4/3] setjmp: optimize longjmp prologues

Submitted by Alexander Monakov on Aug. 12, 2020, 11:34 a.m.

Details

Message ID 20200812113430.9254-1-amonakov@ispras.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Alexander Monakov Aug. 12, 2020, 11:34 a.m.
Use a branchless sequence that is one byte shorter on 64-bit, same size
on 32-bit. Thanks to Pete Cawley for suggesting this variant.
---

I'm sending a revised variant after Pete Cawley (@corsix) suggested a
preferable variant on Twitter. A similar cmp-adc combo can be used to
replace the branchy sequence in i386 longjmp code.

 src/setjmp/i386/longjmp.s   | 6 ++----
 src/setjmp/x32/longjmp.s    | 8 +++-----
 src/setjmp/x86_64/longjmp.s | 8 +++-----
 3 files changed, 8 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/src/setjmp/i386/longjmp.s b/src/setjmp/i386/longjmp.s
index b429f135..8188f06b 100644
--- a/src/setjmp/i386/longjmp.s
+++ b/src/setjmp/i386/longjmp.s
@@ -6,10 +6,8 @@  _longjmp:
 longjmp:
 	mov  4(%esp),%edx
 	mov  8(%esp),%eax
-	test    %eax,%eax
-	jnz 1f
-	inc     %eax
-1:
+	cmp       $1,%eax
+	adc       $0, %al
 	mov   (%edx),%ebx
 	mov  4(%edx),%esi
 	mov  8(%edx),%edi
diff --git a/src/setjmp/x32/longjmp.s b/src/setjmp/x32/longjmp.s
index bb88afa1..1b2661c3 100644
--- a/src/setjmp/x32/longjmp.s
+++ b/src/setjmp/x32/longjmp.s
@@ -5,11 +5,9 @@ 
 .type longjmp,@function
 _longjmp:
 longjmp:
-	mov %esi,%eax           /* val will be longjmp return */
-	test %esi,%esi
-	jnz 1f
-	inc %eax                /* if val==0, val=1 per longjmp semantics */
-1:
+	xor %eax,%eax
+	cmp $1,%esi             /* CF = val ? 0 : 1 */
+	adc %esi,%eax           /* eax = val + !val */
 	mov (%rdi),%rbx         /* rdi is the jmp_buf, restore regs from it */
 	mov 8(%rdi),%rbp
 	mov 16(%rdi),%r12
diff --git a/src/setjmp/x86_64/longjmp.s b/src/setjmp/x86_64/longjmp.s
index bb88afa1..1b2661c3 100644
--- a/src/setjmp/x86_64/longjmp.s
+++ b/src/setjmp/x86_64/longjmp.s
@@ -5,11 +5,9 @@ 
 .type longjmp,@function
 _longjmp:
 longjmp:
-	mov %esi,%eax           /* val will be longjmp return */
-	test %esi,%esi
-	jnz 1f
-	inc %eax                /* if val==0, val=1 per longjmp semantics */
-1:
+	xor %eax,%eax
+	cmp $1,%esi             /* CF = val ? 0 : 1 */
+	adc %esi,%eax           /* eax = val + !val */
 	mov (%rdi),%rbx         /* rdi is the jmp_buf, restore regs from it */
 	mov 8(%rdi),%rbp
 	mov 16(%rdi),%r12