VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/compiler/vcc/x86-aulldvrm-core.mac@ 100594

Last change on this file since 100594 was 98509, checked in by vboxsync, 2 years ago

IPRT/vcc: Applied the unsigned 64-bit division optimizations to the div and rem variants too. bugref:10261

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.0 KB
Line 
1; $Id: x86-aulldvrm-core.mac 98509 2023-02-08 16:01:15Z vboxsync $
2;; @file
3; IPRT - Visual C++ Compiler - unsigned 64-bit division support, x86.
4;
5
6;
7; Copyright (C) 2023 Oracle and/or its affiliates.
8;
9; This file is part of VirtualBox base platform packages, as
10; available from https://www.virtualbox.org.
11;
12; This program is free software; you can redistribute it and/or
13; modify it under the terms of the GNU General Public License
14; as published by the Free Software Foundation, in version 3 of the
15; License.
16;
17; This program is distributed in the hope that it will be useful, but
18; WITHOUT ANY WARRANTY; without even the implied warranty of
19; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20; General Public License for more details.
21;
22; You should have received a copy of the GNU General Public License
23; along with this program; if not, see <https://www.gnu.org/licenses>.
24;
25; The contents of this file may alternatively be used under the terms
26; of the Common Development and Distribution License Version 1.0
27; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28; in the VirtualBox distribution, in which case the provisions of the
29; CDDL are applicable instead of those of the GPL.
30;
31; You may elect to license modified versions of this file under the
32; terms and conditions of either the GPL or the CDDL or both.
33;
34; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35;
36
37
38;*********************************************************************************************************************************
39;* Header Files *
40;*********************************************************************************************************************************
41%include "iprt/asmdefs.mac"
42
43
44;;
45; 64-bit unsigned division.
46;
47; @param 1 Function name.
48; @param 2 dvrm=0, div=1, rem=1
49;
50%macro rtVccUnsignedDivision 2
51
52;;
53; Division of unsigned 64-bit values, returning both quotient and remainder.
54;
55; @returns Quotient in edx:eax, remainder in ebx:ecx.
56; @param [ebp+08h] Dividend (64-bit)
57; @param [ebp+10h] Divisor (64-bit)
58;
59; @note The remainder registers are swapped compared to Watcom's I8D and U8D.
60;
61BEGINPROC_RAW %1
62%if 1
63 ;
64 ; This code is duplicated in in x86-aulldiv.asm and x86-aulldvrm.asm.
65 ; Apply fixes everywhere.
66 ;
67 push ebp
68 mov ebp, esp
69 %if %2 > 0
70 push ebx
71 %endif
72
73%define DIVIDEND_LO ebp + 08h
74%define DIVIDEND_HI ebp + 0ch
75%define DIVISOR_LO ebp + 10h
76%define DIVISOR_HI ebp + 14h
77
78 ;
79 ; If the divisor is only 32-bit wide as we can do a two-step division on 32-bit units.
80 ;
81 mov ebx, [DIVISOR_HI]
82 or ebx, ebx
83 jnz .full_64_bit_divisor
84
85 ; step 1: dividend_hi / divisor
86 mov ebx, [DIVISOR_LO]
87 mov eax, [DIVIDEND_HI]
88 xor edx, edx
89 div ebx
90 %if %2 <= 1
91 mov ecx, eax ; high quotient bits.
92 %endif
93
94 ; step 2: (dividend_lo + step_1_remainder) / divisor
95 mov eax, [DIVIDEND_LO] ; edx contains the remainder from the first step.
96 div ebx ; -> eax = low quotient, edx = remainder.
97
98 %if %2 <= 1
99 xchg edx, ecx ; ecx = (low) remainder, edx = saved high quotient from step 1
100 %if %2 == 0
101 xor ebx, ebx ; ebx = high remainder is zero, since divisor is 32-bit.
102 %endif
103 %else
104 mov eax, edx
105 xor edx, edx ; edx:eax = remainder
106 %endif
107
108 %if %2 > 0
109 pop ebx
110 %endif
111 leave
112 ret 10h
113
114 ;
115 ; The divisor is larger than 32 bits.
116 ;
117 ; We can approximate the quotient by reducing the divisor to 32 bits
118 ; (reducing the dividend accordingly) and perform a 32-bit division.
119 ; The result will be at most one off.
120 ;
121 ; The remainder has to be calculated using multiplication and
122 ; subtraction.
123 ;
124.full_64_bit_divisor:
125 push edi
126
127 ; Find the shift count needed to reduce the divisor to 32-bit.
128 bsr ecx, ebx
129 inc cl
130 test cl, ~31
131 jnz .shift_32
132
133 ; Shift the divisor into edi.
134 mov edi, [DIVISOR_LO]
135 shrd edi, ebx, cl ; edi = reduced divisor
136
137 ; Shift the dividend into edx:eax.
138 mov eax, [DIVIDEND_LO]
139 mov edx, [DIVIDEND_HI]
140 shrd eax, edx, cl
141 shr edx, cl
142 jmp .shifted
143
144.shift_32: ; simplified version.
145 mov edi, ebx
146 mov eax, [DIVIDEND_HI]
147 xor edx, edx
148.shifted:
149
150 ; Divide and save the approximate quotient (Qapprox) in edi.
151 div edi
152 mov edi, eax ; edi = Qapprox
153
154 ; Now multiply Qapprox with the divisor.
155 mul dword [DIVISOR_HI]
156 mov ecx, eax ; temporary storage
157 mov eax, [DIVISOR_LO]
158 mul edi
159 add edx, ecx ; edx:eax = QapproxDividend = Qapprox * divisor
160
161 ; Preload the dividend into ebx:ecx for remainder calculation and for adjusting Qapprox.
162 mov ecx, [DIVIDEND_LO]
163 mov ebx, [DIVIDEND_HI]
164
165 ; If carry is set, the result overflowed 64 bits, so the quotient must be too large.
166 jc .quotient_is_one_above_and_calc_remainder
167
168 ; Calculate the remainder, if this overflows (CF) it means Qapprox is
169 ; one above and we need to reduce it and the adjust the remainder.
170 sub ecx, eax
171 sbb ebx, edx
172 jc .quotient_is_one_above
173.done:
174 %if %2 <= 1
175 mov eax, edi
176 xor edx, edx ; edx:eax = quotient
177 %else
178 mov eax, ecx
179 mov edx, ebx ; edx:eax = remainder
180 %endif
181
182 pop edi
183 %if %2 > 0
184 pop ebx
185 %endif
186 leave
187 ret 10h
188
189.quotient_is_one_above_and_calc_remainder:
190 %if %2 != 1
191 sub ecx, eax
192 sbb ebx, edx
193 %endif
194.quotient_is_one_above:
195 %if %2 != 1
196 add ecx, [DIVISOR_LO]
197 adc ebx, [DIVISOR_HI]
198 %endif
199 %if %2 != 2
200 dec edi
201 %endif
202 jmp .done
203
204%else
205 ;
206 ; Fall back on a rather slow C implementation.
207 ;
208 push ebp
209 mov ebp, esp
210
211 ; Call RTVccUInt64Div(RTUINT64U const *paDividendDivisor, RTUINT64U *paQuotientRemainder)
212 sub esp, 10h ; space for quotient and remainder.
213 mov edx, esp
214 push edx
215 lea ecx, [ebp + 8]
216 push ecx
217 extern NAME(RTVccUInt64Div)
218 call NAME(RTVccUInt64Div)
219
220 ; Load the result.
221 %if %2 != 2
222 mov eax, [ebp - 10h]
223 mov edx, [ebp - 10h + 4]
224
225 %if %2 != 1
226 mov ecx, [ebp - 08h]
227 mov ebx, [ebp - 08h + 4]
228 %endif
229 %else
230 mov eax, [ebp - 08h]
231 mov edx, [ebp - 08h + 4]
232 %endif
233 leave
234 ret 10h
235%endif
236ENDPROC_RAW %1
237%endm
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette