More additions for the math/i386 subtree

Submitted by Stefan Kanthak on Dec. 10, 2019, 9:48 p.m.

Details

Message ID 2DBF29D377CC4516BCB01E382C0EEF31@H270
State New
Series "More additions for the math/i386 subtree"
Headers show

Commit Message

Stefan Kanthak Dec. 10, 2019, 9:48 p.m.
Optimised branch-free implementations of fmax() and fmin() for i386

JFTR: I'm NOT subscribed to your mailing list, so CC: me in replies!

Patch hide | download patch | download mbox

--- -/dev/null
+++ +/src/math/i386/fmax.S
@@ -0,0 +1,26 @@ 
+.global fmaxf
+.type fmaxf,@function
+fmaxf:
+        flds 4(%esp)
+        flds 8(%esp)
+        jmp 1f
+
+.global fmaxl
+.type fmaxl,@function
+fmaxl:
+        fldt 4(%esp)
+        fldt 16(%esp)
+        jmp 1f
+
+.global fmax
+.type fmax,@function
+fmax:
+        fldl 4(%esp)
+        fldl 12(%esp)
+1:      fucomi %st(0),%st(0)
+        fcmovu %st(1),%st(0)
+        fxch %st(1)
+        fucomi %st(1),%st(0)
+        fcmovu %st(1),%st(0)
+        fstp %st(1)
+        ret

--- -/dev/null
+++ +/src/math/i386/fmin.S
@@ -0,0 +1,26 @@ 
+.global fminf
+.type fminf,@function
+fminf:
+        flds 4(%esp)
+        flds 8(%esp)
+        jmp 1f
+
+.global fminl
+.type fminl,@function
+fminl:
+        fldt 4(%esp)
+        fldt 16(%esp)
+        jmp 1f
+
+.global fmin
+.type fmin,@function
+fmin:
+        fldl 4(%esp)
+        fldl 12(%esp)
+1:      fucomi %st(0),%st(0)
+        fcmovu %st(1),%st(0)
+        fucomi %st(1),%st(0)
+        fcmovu %st(1),%st(0)
+        fstp %st(1)
+        ret