VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 46957

Last change on this file since 46957 was 46957, checked in by vboxsync, 11 years ago

check for DIV overflow, fixing DIV/0 checks. IDIV overflow/underflow is still on the todo.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 69.8 KB
Line 
1; $Id: IEMAllAImpl.asm 46957 2013-07-04 02:05:41Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6; Copyright (C) 2011-2012 Oracle Corporation
7;
8; This file is part of VirtualBox Open Source Edition (OSE), as
9; available from http://www.virtualbox.org. This file is free software;
10; you can redistribute it and/or modify it under the terms of the GNU
11; General Public License (GPL) as published by the Free Software
12; Foundation, in version 2 as it comes in the "COPYING" file of the
13; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15;
16
17
18;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19; Header Files ;
20;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21%include "VBox/asmdefs.mac"
22%include "VBox/err.mac"
23%include "iprt/x86.mac"
24
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27; Defined Constants And Macros ;
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;
31; RET XX / RET wrapper for fastcall.
32;
33%macro RET_FASTCALL 1
34%ifdef RT_ARCH_X86
35 %ifdef RT_OS_WINDOWS
36 ret %1
37 %else
38 ret
39 %endif
40%else
41 ret
42%endif
43%endmacro
44
45;;
46; NAME for fastcall functions.
47;
48;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49; escaping (or whatever the dollar is good for here). Thus the ugly
50; prefix argument.
51;
52%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53%ifdef RT_ARCH_X86
54 %ifdef RT_OS_WINDOWS
55 %undef NAME_FASTCALL
56 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57 %endif
58%endif
59
60;;
61; BEGINPROC for fastcall functions.
62;
63; @param 1 The function name (C).
64; @param 2 The argument size on x86.
65;
66%macro BEGINPROC_FASTCALL 2
67 %ifdef ASM_FORMAT_PE
68 export %1=NAME_FASTCALL(%1,%2,$@)
69 %endif
70 %ifdef __NASM__
71 %ifdef ASM_FORMAT_OMF
72 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73 %endif
74 %endif
75 %ifndef ASM_FORMAT_BIN
76 global NAME_FASTCALL(%1,%2,$@)
77 %endif
78NAME_FASTCALL(%1,%2,@):
79%endmacro
80
81
82;
83; We employ some macro assembly here to hid the calling convention differences.
84;
85%ifdef RT_ARCH_AMD64
86 %macro PROLOGUE_1_ARGS 0
87 %endmacro
88 %macro EPILOGUE_1_ARGS 0
89 ret
90 %endmacro
91 %macro EPILOGUE_1_ARGS_EX 0
92 ret
93 %endmacro
94
95 %macro PROLOGUE_2_ARGS 0
96 %endmacro
97 %macro EPILOGUE_2_ARGS 0
98 ret
99 %endmacro
100 %macro EPILOGUE_2_ARGS_EX 1
101 ret
102 %endmacro
103
104 %macro PROLOGUE_3_ARGS 0
105 %endmacro
106 %macro EPILOGUE_3_ARGS 0
107 ret
108 %endmacro
109 %macro EPILOGUE_3_ARGS_EX 1
110 ret
111 %endmacro
112
113 %macro PROLOGUE_4_ARGS 0
114 %endmacro
115 %macro EPILOGUE_4_ARGS 0
116 ret
117 %endmacro
118 %macro EPILOGUE_4_ARGS_EX 1
119 ret
120 %endmacro
121
122 %ifdef ASM_CALL64_GCC
123 %define A0 rdi
124 %define A0_32 edi
125 %define A0_16 di
126 %define A0_8 dil
127
128 %define A1 rsi
129 %define A1_32 esi
130 %define A1_16 si
131 %define A1_8 sil
132
133 %define A2 rdx
134 %define A2_32 edx
135 %define A2_16 dx
136 %define A2_8 dl
137
138 %define A3 rcx
139 %define A3_32 ecx
140 %define A3_16 cx
141 %endif
142
143 %ifdef ASM_CALL64_MSC
144 %define A0 rcx
145 %define A0_32 ecx
146 %define A0_16 cx
147 %define A0_8 cl
148
149 %define A1 rdx
150 %define A1_32 edx
151 %define A1_16 dx
152 %define A1_8 dl
153
154 %define A2 r8
155 %define A2_32 r8d
156 %define A2_16 r8w
157 %define A2_8 r8b
158
159 %define A3 r9
160 %define A3_32 r9d
161 %define A3_16 r9w
162 %endif
163
164 %define T0 rax
165 %define T0_32 eax
166 %define T0_16 ax
167 %define T0_8 al
168
169 %define T1 r11
170 %define T1_32 r11d
171 %define T1_16 r11w
172 %define T1_8 r11b
173
174%else
175 ; x86
176 %macro PROLOGUE_1_ARGS 0
177 push edi
178 %endmacro
179 %macro EPILOGUE_1_ARGS 0
180 pop edi
181 ret 0
182 %endmacro
183 %macro EPILOGUE_1_ARGS_EX 1
184 pop edi
185 ret %1
186 %endmacro
187
188 %macro PROLOGUE_2_ARGS 0
189 push edi
190 %endmacro
191 %macro EPILOGUE_2_ARGS 0
192 pop edi
193 ret 0
194 %endmacro
195 %macro EPILOGUE_2_ARGS_EX 1
196 pop edi
197 ret %1
198 %endmacro
199
200 %macro PROLOGUE_3_ARGS 0
201 push ebx
202 mov ebx, [esp + 4 + 4]
203 push edi
204 %endmacro
205 %macro EPILOGUE_3_ARGS_EX 1
206 %if (%1) < 4
207 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208 %endif
209 pop edi
210 pop ebx
211 ret %1
212 %endmacro
213 %macro EPILOGUE_3_ARGS 0
214 EPILOGUE_3_ARGS_EX 4
215 %endmacro
216
217 %macro PROLOGUE_4_ARGS 0
218 push ebx
219 push edi
220 push esi
221 mov ebx, [esp + 12 + 4 + 0]
222 mov esi, [esp + 12 + 4 + 4]
223 %endmacro
224 %macro EPILOGUE_4_ARGS_EX 1
225 %if (%1) < 8
226 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227 %endif
228 pop esi
229 pop edi
230 pop ebx
231 ret %1
232 %endmacro
233 %macro EPILOGUE_4_ARGS 0
234 EPILOGUE_4_ARGS_EX 8
235 %endmacro
236
237 %define A0 ecx
238 %define A0_32 ecx
239 %define A0_16 cx
240 %define A0_8 cl
241
242 %define A1 edx
243 %define A1_32 edx
244 %define A1_16 dx
245 %define A1_8 dl
246
247 %define A2 ebx
248 %define A2_32 ebx
249 %define A2_16 bx
250 %define A2_8 bl
251
252 %define A3 esi
253 %define A3_32 esi
254 %define A3_16 si
255
256 %define T0 eax
257 %define T0_32 eax
258 %define T0_16 ax
259 %define T0_8 al
260
261 %define T1 edi
262 %define T1_32 edi
263 %define T1_16 di
264%endif
265
266
267;;
268; Load the relevant flags from [%1] if there are undefined flags (%3).
269;
270; @remarks Clobbers T0, stack. Changes EFLAGS.
271; @param A2 The register pointing to the flags.
272; @param 1 The parameter (A0..A3) pointing to the eflags.
273; @param 2 The set of modified flags.
274; @param 3 The set of undefined flags.
275;
276%macro IEM_MAYBE_LOAD_FLAGS 3
277 ;%if (%3) != 0
278 pushf ; store current flags
279 mov T0_32, [%1] ; load the guest flags
280 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
281 and T0_32, (%2 | %3) ; select the modified and undefined flags.
282 or [xSP], T0 ; merge guest flags with host flags.
283 popf ; load the mixed flags.
284 ;%endif
285%endmacro
286
287;;
288; Update the flag.
289;
290; @remarks Clobbers T0, T1, stack.
291; @param 1 The register pointing to the EFLAGS.
292; @param 2 The mask of modified flags to save.
293; @param 3 The mask of undefined flags to (maybe) save.
294;
295%macro IEM_SAVE_FLAGS 3
296 %if (%2 | %3) != 0
297 pushf
298 pop T1
299 mov T0_32, [%1] ; flags
300 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
301 and T1_32, (%2 | %3) ; select the modified and undefined flags.
302 or T0_32, T1_32 ; combine the flags.
303 mov [%1], T0_32 ; save the flags.
304 %endif
305%endmacro
306
307
308;;
309; Macro for implementing a binary operator.
310;
311; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312; variants, except on 32-bit system where the 64-bit accesses requires hand
313; coding.
314;
315; All the functions takes a pointer to the destination memory operand in A0,
316; the source register operand in A1 and a pointer to eflags in A2.
317;
318; @param 1 The instruction mnemonic.
319; @param 2 Non-zero if there should be a locked version.
320; @param 3 The modified flags.
321; @param 4 The undefined flags.
322;
323%macro IEMIMPL_BIN_OP 4
324BEGINCODE
325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326 PROLOGUE_3_ARGS
327 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328 %1 byte [A0], A1_8
329 IEM_SAVE_FLAGS A2, %3, %4
330 EPILOGUE_3_ARGS
331ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334 PROLOGUE_3_ARGS
335 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336 %1 word [A0], A1_16
337 IEM_SAVE_FLAGS A2, %3, %4
338 EPILOGUE_3_ARGS
339ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342 PROLOGUE_3_ARGS
343 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344 %1 dword [A0], A1_32
345 IEM_SAVE_FLAGS A2, %3, %4
346 EPILOGUE_3_ARGS
347ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349 %ifdef RT_ARCH_AMD64
350BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351 PROLOGUE_3_ARGS
352 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353 %1 qword [A0], A1
354 IEM_SAVE_FLAGS A2, %3, %4
355 EPILOGUE_3_ARGS_EX 8
356ENDPROC iemAImpl_ %+ %1 %+ _u64
357 %else ; stub it for now - later, replace with hand coded stuff.
358BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
359 int3
360 ret
361ENDPROC iemAImpl_ %+ %1 %+ _u64
362 %endif ; !RT_ARCH_AMD64
363
364 %if %2 != 0 ; locked versions requested?
365
366BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
367 PROLOGUE_3_ARGS
368 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
369 lock %1 byte [A0], A1_8
370 IEM_SAVE_FLAGS A2, %3, %4
371 EPILOGUE_3_ARGS
372ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
373
374BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
375 PROLOGUE_3_ARGS
376 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
377 lock %1 word [A0], A1_16
378 IEM_SAVE_FLAGS A2, %3, %4
379 EPILOGUE_3_ARGS
380ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
381
382BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
383 PROLOGUE_3_ARGS
384 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
385 lock %1 dword [A0], A1_32
386 IEM_SAVE_FLAGS A2, %3, %4
387 EPILOGUE_3_ARGS
388ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
389
390 %ifdef RT_ARCH_AMD64
391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
392 PROLOGUE_3_ARGS
393 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
394 lock %1 qword [A0], A1
395 IEM_SAVE_FLAGS A2, %3, %4
396 EPILOGUE_3_ARGS_EX 8
397ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
398 %else ; stub it for now - later, replace with hand coded stuff.
399BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
400 int3
401 ret 8
402ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
403 %endif ; !RT_ARCH_AMD64
404 %endif ; locked
405%endmacro
406
407; instr,lock,modified-flags.
408IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
409IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
410IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
411IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
412IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
413IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
414IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
415IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
416IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
417
418
419;;
420; Macro for implementing a bit operator.
421;
422; This will generate code for the 16, 32 and 64 bit accesses with locked
423; variants, except on 32-bit system where the 64-bit accesses requires hand
424; coding.
425;
426; All the functions takes a pointer to the destination memory operand in A0,
427; the source register operand in A1 and a pointer to eflags in A2.
428;
429; @param 1 The instruction mnemonic.
430; @param 2 Non-zero if there should be a locked version.
431; @param 3 The modified flags.
432; @param 4 The undefined flags.
433;
434%macro IEMIMPL_BIT_OP 4
435BEGINCODE
436BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
437 PROLOGUE_3_ARGS
438 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
439 %1 word [A0], A1_16
440 IEM_SAVE_FLAGS A2, %3, %4
441 EPILOGUE_3_ARGS
442ENDPROC iemAImpl_ %+ %1 %+ _u16
443
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 dword [A0], A1_32
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS
450ENDPROC iemAImpl_ %+ %1 %+ _u32
451
452 %ifdef RT_ARCH_AMD64
453BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
454 PROLOGUE_3_ARGS
455 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
456 %1 qword [A0], A1
457 IEM_SAVE_FLAGS A2, %3, %4
458 EPILOGUE_3_ARGS_EX 8
459ENDPROC iemAImpl_ %+ %1 %+ _u64
460 %else ; stub it for now - later, replace with hand coded stuff.
461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
462 int3
463 ret 8
464ENDPROC iemAImpl_ %+ %1 %+ _u64
465 %endif ; !RT_ARCH_AMD64
466
467 %if %2 != 0 ; locked versions requested?
468
469BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
470 PROLOGUE_3_ARGS
471 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472 lock %1 word [A0], A1_16
473 IEM_SAVE_FLAGS A2, %3, %4
474 EPILOGUE_3_ARGS
475ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
476
477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
478 PROLOGUE_3_ARGS
479 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480 lock %1 dword [A0], A1_32
481 IEM_SAVE_FLAGS A2, %3, %4
482 EPILOGUE_3_ARGS
483ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
484
485 %ifdef RT_ARCH_AMD64
486BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
487 PROLOGUE_3_ARGS
488 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489 lock %1 qword [A0], A1
490 IEM_SAVE_FLAGS A2, %3, %4
491 EPILOGUE_3_ARGS_EX 8
492ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
493 %else ; stub it for now - later, replace with hand coded stuff.
494BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
495 int3
496 ret 8
497ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
498 %endif ; !RT_ARCH_AMD64
499 %endif ; locked
500%endmacro
501IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
502IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
503IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
504IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
505
506;;
507; Macro for implementing a bit search operator.
508;
509; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
510; system where the 64-bit accesses requires hand coding.
511;
512; All the functions takes a pointer to the destination memory operand in A0,
513; the source register operand in A1 and a pointer to eflags in A2.
514;
515; @param 1 The instruction mnemonic.
516; @param 2 The modified flags.
517; @param 3 The undefined flags.
518;
519%macro IEMIMPL_BIT_OP 3
520BEGINCODE
521BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
522 PROLOGUE_3_ARGS
523 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
524 %1 T0_16, A1_16
525 jz .unchanged_dst
526 mov [A0], T0_16
527.unchanged_dst:
528 IEM_SAVE_FLAGS A2, %2, %3
529 EPILOGUE_3_ARGS
530ENDPROC iemAImpl_ %+ %1 %+ _u16
531
532BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
533 PROLOGUE_3_ARGS
534 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
535 %1 T0_32, A1_32
536 jz .unchanged_dst
537 mov [A0], T0_32
538.unchanged_dst:
539 IEM_SAVE_FLAGS A2, %2, %3
540 EPILOGUE_3_ARGS
541ENDPROC iemAImpl_ %+ %1 %+ _u32
542
543 %ifdef RT_ARCH_AMD64
544BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
545 PROLOGUE_3_ARGS
546 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
547 %1 T0, A1
548 jz .unchanged_dst
549 mov [A0], T0
550.unchanged_dst:
551 IEM_SAVE_FLAGS A2, %2, %3
552 EPILOGUE_3_ARGS_EX 8
553ENDPROC iemAImpl_ %+ %1 %+ _u64
554 %else ; stub it for now - later, replace with hand coded stuff.
555BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
556 int3
557 ret 8
558ENDPROC iemAImpl_ %+ %1 %+ _u64
559 %endif ; !RT_ARCH_AMD64
560%endmacro
561IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
562IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
563
564
565;
566; IMUL is also a similar but yet different case (no lock, no mem dst).
567; The rDX:rAX variant of imul is handled together with mul further down.
568;
569BEGINCODE
570BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
571 PROLOGUE_3_ARGS
572 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
573 imul A1_16, word [A0]
574 mov [A0], A1_16
575 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
576 EPILOGUE_3_ARGS
577ENDPROC iemAImpl_imul_two_u16
578
579BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
580 PROLOGUE_3_ARGS
581 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
582 imul A1_32, dword [A0]
583 mov [A0], A1_32
584 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
585 EPILOGUE_3_ARGS
586ENDPROC iemAImpl_imul_two_u32
587
588BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
589 PROLOGUE_3_ARGS
590%ifdef RT_ARCH_AMD64
591 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
592 imul A1, qword [A0]
593 mov [A0], A1
594 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
595%else
596 int3 ;; @todo implement me
597%endif
598 EPILOGUE_3_ARGS_EX 8
599ENDPROC iemAImpl_imul_two_u64
600
601
602;
603; XCHG for memory operands. This implies locking. No flag changes.
604;
605; Each function takes two arguments, first the pointer to the memory,
606; then the pointer to the register. They all return void.
607;
608BEGINCODE
609BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
610 PROLOGUE_2_ARGS
611 mov T0_8, [A1]
612 xchg [A0], T0_8
613 mov [A1], T0_8
614 EPILOGUE_2_ARGS
615ENDPROC iemAImpl_xchg_u8
616
617BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
618 PROLOGUE_2_ARGS
619 mov T0_16, [A1]
620 xchg [A0], T0_16
621 mov [A1], T0_16
622 EPILOGUE_2_ARGS
623ENDPROC iemAImpl_xchg_u16
624
625BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
626 PROLOGUE_2_ARGS
627 mov T0_32, [A1]
628 xchg [A0], T0_32
629 mov [A1], T0_32
630 EPILOGUE_2_ARGS
631ENDPROC iemAImpl_xchg_u32
632
633BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
634%ifdef RT_ARCH_AMD64
635 PROLOGUE_2_ARGS
636 mov T0, [A1]
637 xchg [A0], T0
638 mov [A1], T0
639 EPILOGUE_2_ARGS
640%else
641 int3
642 ret 0
643%endif
644ENDPROC iemAImpl_xchg_u64
645
646
647;
648; XADD for memory operands.
649;
650; Each function takes three arguments, first the pointer to the
651; memory/register, then the pointer to the register, and finally a pointer to
652; eflags. They all return void.
653;
654BEGINCODE
655BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
656 PROLOGUE_3_ARGS
657 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
658 mov T0_8, [A1]
659 xadd [A0], T0_8
660 mov [A1], T0_8
661 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
662 EPILOGUE_3_ARGS
663ENDPROC iemAImpl_xadd_u8
664
665BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
666 PROLOGUE_3_ARGS
667 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
668 mov T0_16, [A1]
669 xadd [A0], T0_16
670 mov [A1], T0_16
671 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
672 EPILOGUE_3_ARGS
673ENDPROC iemAImpl_xadd_u16
674
675BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
676 PROLOGUE_3_ARGS
677 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
678 mov T0_32, [A1]
679 xadd [A0], T0_32
680 mov [A1], T0_32
681 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
682 EPILOGUE_3_ARGS
683ENDPROC iemAImpl_xadd_u32
684
685BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
686%ifdef RT_ARCH_AMD64
687 PROLOGUE_3_ARGS
688 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
689 mov T0, [A1]
690 xadd [A0], T0
691 mov [A1], T0
692 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
693 EPILOGUE_3_ARGS
694%else
695 int3
696 ret 4
697%endif
698ENDPROC iemAImpl_xadd_u64
699
700BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
701 PROLOGUE_3_ARGS
702 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
703 mov T0_8, [A1]
704 lock xadd [A0], T0_8
705 mov [A1], T0_8
706 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
707 EPILOGUE_3_ARGS
708ENDPROC iemAImpl_xadd_u8_locked
709
710BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
711 PROLOGUE_3_ARGS
712 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
713 mov T0_16, [A1]
714 lock xadd [A0], T0_16
715 mov [A1], T0_16
716 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
717 EPILOGUE_3_ARGS
718ENDPROC iemAImpl_xadd_u16_locked
719
720BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
721 PROLOGUE_3_ARGS
722 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
723 mov T0_32, [A1]
724 lock xadd [A0], T0_32
725 mov [A1], T0_32
726 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
727 EPILOGUE_3_ARGS
728ENDPROC iemAImpl_xadd_u32_locked
729
730BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
731%ifdef RT_ARCH_AMD64
732 PROLOGUE_3_ARGS
733 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
734 mov T0, [A1]
735 lock xadd [A0], T0
736 mov [A1], T0
737 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
738 EPILOGUE_3_ARGS
739%else
740 int3
741 ret 4
742%endif
743ENDPROC iemAImpl_xadd_u64_locked
744
745
746;
747; CMPXCHG8B.
748;
749; These are tricky register wise, so the code is duplicated for each calling
750; convention.
751;
752; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
753;
754; C-proto:
755; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
756; uint32_t *pEFlags));
757;
758BEGINCODE
759BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
760%ifdef RT_ARCH_AMD64
761 %ifdef ASM_CALL64_MSC
762 push rbx
763
764 mov r11, rdx ; pu64EaxEdx (is also T1)
765 mov r10, rcx ; pu64Dst
766
767 mov ebx, [r8]
768 mov ecx, [r8 + 4]
769 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
770 mov eax, [r11]
771 mov edx, [r11 + 4]
772
773 lock cmpxchg8b [r10]
774
775 mov [r11], eax
776 mov [r11 + 4], edx
777 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
778
779 pop rbx
780 ret
781 %else
782 push rbx
783
784 mov r10, rcx ; pEFlags
785 mov r11, rdx ; pu64EbxEcx (is also T1)
786
787 mov ebx, [r11]
788 mov ecx, [r11 + 4]
789 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
790 mov eax, [rsi]
791 mov edx, [rsi + 4]
792
793 lock cmpxchg8b [rdi]
794
795 mov [rsi], eax
796 mov [rsi + 4], edx
797 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
798
799 pop rbx
800 ret
801
802 %endif
803%else
804 push esi
805 push edi
806 push ebx
807 push ebp
808
809 mov edi, ecx ; pu64Dst
810 mov esi, edx ; pu64EaxEdx
811 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
812 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
813
814 mov ebx, [ecx]
815 mov ecx, [ecx + 4]
816 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
817 mov eax, [esi]
818 mov edx, [esi + 4]
819
820 lock cmpxchg8b [edi]
821
822 mov [esi], eax
823 mov [esi + 4], edx
824 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
825
826 pop ebp
827 pop ebx
828 pop edi
829 pop esi
830 ret 8
831%endif
832ENDPROC iemAImpl_cmpxchg8b
833
834BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
835 ; Lazy bird always lock prefixes cmpxchg8b.
836 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
837ENDPROC iemAImpl_cmpxchg8b_locked
838
839
840
841;
842; CMPXCHG.
843;
844; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
845;
846; C-proto:
847; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
848;
849BEGINCODE
850%macro IEMIMPL_CMPXCHG 2
851BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
852 PROLOGUE_4_ARGS
853 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
854 mov al, [A1]
855 %1 cmpxchg [A0], A2_8
856 mov [A1], al
857 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
858 EPILOGUE_4_ARGS
859ENDPROC iemAImpl_cmpxchg_u8 %+ %2
860
861BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
862 PROLOGUE_4_ARGS
863 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
864 mov ax, [A1]
865 %1 cmpxchg [A0], A2_16
866 mov [A1], ax
867 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
868 EPILOGUE_4_ARGS
869ENDPROC iemAImpl_cmpxchg_u16 %+ %2
870
871BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
872 PROLOGUE_4_ARGS
873 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
874 mov eax, [A1]
875 %1 cmpxchg [A0], A2_32
876 mov [A1], eax
877 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
878 EPILOGUE_4_ARGS
879ENDPROC iemAImpl_cmpxchg_u32 %+ %2
880
881BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
882%ifdef RT_ARCH_AMD64
883 PROLOGUE_4_ARGS
884 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
885 mov ax, [A1]
886 %1 cmpxchg [A0], A2
887 mov [A1], ax
888 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
889 EPILOGUE_4_ARGS
890%else
891 ;
892 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
893 ;
894 push esi
895 push edi
896 push ebx
897 push ebp
898
899 mov edi, ecx ; pu64Dst
900 mov esi, edx ; pu64Rax
901 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
902 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
903
904 mov ebx, [ecx]
905 mov ecx, [ecx + 4]
906 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
907 mov eax, [esi]
908 mov edx, [esi + 4]
909
910 lock cmpxchg8b [edi]
911
912 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
913 jz .cmpxchg8b_not_equal
914 cmp eax, eax ; just set the other flags.
915.store:
916 mov [esi], eax
917 mov [esi + 4], edx
918 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
919
920 pop ebp
921 pop ebx
922 pop edi
923 pop esi
924 ret 8
925
926.cmpxchg8b_not_equal:
927 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
928 jne .store
929 cmp [esi], eax
930 jmp .store
931
932%endif
933ENDPROC iemAImpl_cmpxchg_u64 %+ %2
934%endmacro ; IEMIMPL_CMPXCHG
935
936IEMIMPL_CMPXCHG , ,
937IEMIMPL_CMPXCHG lock, _locked
938
939;;
940; Macro for implementing a unary operator.
941;
942; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
943; variants, except on 32-bit system where the 64-bit accesses requires hand
944; coding.
945;
946; All the functions takes a pointer to the destination memory operand in A0,
947; the source register operand in A1 and a pointer to eflags in A2.
948;
949; @param 1 The instruction mnemonic.
950; @param 2 The modified flags.
951; @param 3 The undefined flags.
952;
953%macro IEMIMPL_UNARY_OP 3
954BEGINCODE
955BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
956 PROLOGUE_2_ARGS
957 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
958 %1 byte [A0]
959 IEM_SAVE_FLAGS A1, %2, %3
960 EPILOGUE_2_ARGS
961ENDPROC iemAImpl_ %+ %1 %+ _u8
962
963BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
964 PROLOGUE_2_ARGS
965 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
966 lock %1 byte [A0]
967 IEM_SAVE_FLAGS A1, %2, %3
968 EPILOGUE_2_ARGS
969ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
970
971BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
972 PROLOGUE_2_ARGS
973 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
974 %1 word [A0]
975 IEM_SAVE_FLAGS A1, %2, %3
976 EPILOGUE_2_ARGS
977ENDPROC iemAImpl_ %+ %1 %+ _u16
978
979BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
980 PROLOGUE_2_ARGS
981 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
982 lock %1 word [A0]
983 IEM_SAVE_FLAGS A1, %2, %3
984 EPILOGUE_2_ARGS
985ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
986
987BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
988 PROLOGUE_2_ARGS
989 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
990 %1 dword [A0]
991 IEM_SAVE_FLAGS A1, %2, %3
992 EPILOGUE_2_ARGS
993ENDPROC iemAImpl_ %+ %1 %+ _u32
994
995BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
996 PROLOGUE_2_ARGS
997 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
998 lock %1 dword [A0]
999 IEM_SAVE_FLAGS A1, %2, %3
1000 EPILOGUE_2_ARGS
1001ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1002
1003 %ifdef RT_ARCH_AMD64
1004BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1005 PROLOGUE_2_ARGS
1006 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1007 %1 qword [A0]
1008 IEM_SAVE_FLAGS A1, %2, %3
1009 EPILOGUE_2_ARGS
1010ENDPROC iemAImpl_ %+ %1 %+ _u64
1011
1012BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1013 PROLOGUE_2_ARGS
1014 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1015 lock %1 qword [A0]
1016 IEM_SAVE_FLAGS A1, %2, %3
1017 EPILOGUE_2_ARGS
1018ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1019 %else
1020 ; stub them for now.
1021BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1022 int3
1023 ret 0
1024ENDPROC iemAImpl_ %+ %1 %+ _u64
1025BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1026 int3
1027 ret 0
1028ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1029 %endif
1030
1031%endmacro
1032
1033IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1034IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1035IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1036IEMIMPL_UNARY_OP not, 0, 0
1037
1038
1039
1040;;
1041; Macro for implementing a shift operation.
1042;
1043; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1044; 32-bit system where the 64-bit accesses requires hand coding.
1045;
1046; All the functions takes a pointer to the destination memory operand in A0,
1047; the shift count in A1 and a pointer to eflags in A2.
1048;
1049; @param 1 The instruction mnemonic.
1050; @param 2 The modified flags.
1051; @param 3 The undefined flags.
1052;
1053; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1054;
1055%macro IEMIMPL_SHIFT_OP 3
1056BEGINCODE
1057BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1058 PROLOGUE_3_ARGS
1059 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1060 %ifdef ASM_CALL64_GCC
1061 mov cl, A1_8
1062 %1 byte [A0], cl
1063 %else
1064 xchg A1, A0
1065 %1 byte [A1], cl
1066 %endif
1067 IEM_SAVE_FLAGS A2, %2, %3
1068 EPILOGUE_3_ARGS
1069ENDPROC iemAImpl_ %+ %1 %+ _u8
1070
1071BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1072 PROLOGUE_3_ARGS
1073 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1074 %ifdef ASM_CALL64_GCC
1075 mov cl, A1_8
1076 %1 word [A0], cl
1077 %else
1078 xchg A1, A0
1079 %1 word [A1], cl
1080 %endif
1081 IEM_SAVE_FLAGS A2, %2, %3
1082 EPILOGUE_3_ARGS
1083ENDPROC iemAImpl_ %+ %1 %+ _u16
1084
1085BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1086 PROLOGUE_3_ARGS
1087 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1088 %ifdef ASM_CALL64_GCC
1089 mov cl, A1_8
1090 %1 dword [A0], cl
1091 %else
1092 xchg A1, A0
1093 %1 dword [A1], cl
1094 %endif
1095 IEM_SAVE_FLAGS A2, %2, %3
1096 EPILOGUE_3_ARGS
1097ENDPROC iemAImpl_ %+ %1 %+ _u32
1098
1099 %ifdef RT_ARCH_AMD64
1100BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1101 PROLOGUE_3_ARGS
1102 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1103 %ifdef ASM_CALL64_GCC
1104 mov cl, A1_8
1105 %1 qword [A0], cl
1106 %else
1107 xchg A1, A0
1108 %1 qword [A1], cl
1109 %endif
1110 IEM_SAVE_FLAGS A2, %2, %3
1111 EPILOGUE_3_ARGS
1112ENDPROC iemAImpl_ %+ %1 %+ _u64
1113 %else ; stub it for now - later, replace with hand coded stuff.
1114BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1115 int3
1116 ret 4
1117ENDPROC iemAImpl_ %+ %1 %+ _u64
1118 %endif ; !RT_ARCH_AMD64
1119
1120%endmacro
1121
1122IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1123IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1124IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1125IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1126IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1127IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1128IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1129
1130
1131;;
1132; Macro for implementing a double precision shift operation.
1133;
1134; This will generate code for the 16, 32 and 64 bit accesses, except on
1135; 32-bit system where the 64-bit accesses requires hand coding.
1136;
1137; The functions takes the destination operand (r/m) in A0, the source (reg) in
1138; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1139;
1140; @param 1 The instruction mnemonic.
1141; @param 2 The modified flags.
1142; @param 3 The undefined flags.
1143;
1144; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1145;
1146%macro IEMIMPL_SHIFT_DBL_OP 3
1147BEGINCODE
1148BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1149 PROLOGUE_4_ARGS
1150 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1151 %ifdef ASM_CALL64_GCC
1152 xchg A3, A2
1153 %1 [A0], A1_16, cl
1154 xchg A3, A2
1155 %else
1156 xchg A0, A2
1157 %1 [A2], A1_16, cl
1158 %endif
1159 IEM_SAVE_FLAGS A3, %2, %3
1160 EPILOGUE_4_ARGS
1161ENDPROC iemAImpl_ %+ %1 %+ _u16
1162
1163BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1164 PROLOGUE_4_ARGS
1165 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1166 %ifdef ASM_CALL64_GCC
1167 xchg A3, A2
1168 %1 [A0], A1_32, cl
1169 xchg A3, A2
1170 %else
1171 xchg A0, A2
1172 %1 [A2], A1_32, cl
1173 %endif
1174 IEM_SAVE_FLAGS A3, %2, %3
1175 EPILOGUE_4_ARGS
1176ENDPROC iemAImpl_ %+ %1 %+ _u32
1177
1178 %ifdef RT_ARCH_AMD64
1179BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1180 PROLOGUE_4_ARGS
1181 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1182 %ifdef ASM_CALL64_GCC
1183 xchg A3, A2
1184 %1 [A0], A1, cl
1185 xchg A3, A2
1186 %else
1187 xchg A0, A2
1188 %1 [A2], A1, cl
1189 %endif
1190 IEM_SAVE_FLAGS A3, %2, %3
1191 EPILOGUE_4_ARGS_EX 12
1192ENDPROC iemAImpl_ %+ %1 %+ _u64
1193 %else ; stub it for now - later, replace with hand coded stuff.
1194BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1195 int3
1196 ret 12
1197ENDPROC iemAImpl_ %+ %1 %+ _u64
1198 %endif ; !RT_ARCH_AMD64
1199
1200%endmacro
1201
1202IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1203IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1204
1205
1206;;
1207; Macro for implementing a multiplication operations.
1208;
1209; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1210; 32-bit system where the 64-bit accesses requires hand coding.
1211;
1212; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1213; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1214; pointer to eflags in A3.
1215;
1216; The functions all return 0 so the caller can be used for div/idiv as well as
1217; for the mul/imul implementation.
1218;
1219; @param 1 The instruction mnemonic.
1220; @param 2 The modified flags.
1221; @param 3 The undefined flags.
1222;
1223; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1224;
1225%macro IEMIMPL_MUL_OP 3
1226BEGINCODE
1227BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1228 PROLOGUE_3_ARGS
1229 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1230 mov al, [A0]
1231 %1 A1_8
1232 mov [A0], ax
1233 IEM_SAVE_FLAGS A2, %2, %3
1234 xor eax, eax
1235 EPILOGUE_3_ARGS
1236ENDPROC iemAImpl_ %+ %1 %+ _u8
1237
1238BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1239 PROLOGUE_4_ARGS
1240 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1241 mov ax, [A0]
1242 %ifdef ASM_CALL64_GCC
1243 %1 A2_16
1244 mov [A0], ax
1245 mov [A1], dx
1246 %else
1247 mov T1, A1
1248 %1 A2_16
1249 mov [A0], ax
1250 mov [T1], dx
1251 %endif
1252 IEM_SAVE_FLAGS A3, %2, %3
1253 xor eax, eax
1254 EPILOGUE_4_ARGS
1255ENDPROC iemAImpl_ %+ %1 %+ _u16
1256
1257BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1258 PROLOGUE_4_ARGS
1259 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1260 mov eax, [A0]
1261 %ifdef ASM_CALL64_GCC
1262 %1 A2_32
1263 mov [A0], eax
1264 mov [A1], edx
1265 %else
1266 mov T1, A1
1267 %1 A2_32
1268 mov [A0], eax
1269 mov [T1], edx
1270 %endif
1271 IEM_SAVE_FLAGS A3, %2, %3
1272 xor eax, eax
1273 EPILOGUE_4_ARGS
1274ENDPROC iemAImpl_ %+ %1 %+ _u32
1275
1276 %ifdef RT_ARCH_AMD64
1277BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1278 PROLOGUE_4_ARGS
1279 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1280 mov rax, [A0]
1281 %ifdef ASM_CALL64_GCC
1282 %1 A2
1283 mov [A0], rax
1284 mov [A1], rdx
1285 %else
1286 mov T1, A1
1287 %1 A2
1288 mov [A0], rax
1289 mov [T1], rdx
1290 %endif
1291 IEM_SAVE_FLAGS A3, %2, %3
1292 xor eax, eax
1293 EPILOGUE_4_ARGS_EX 12
1294ENDPROC iemAImpl_ %+ %1 %+ _u64
1295 %else ; stub it for now - later, replace with hand coded stuff.
1296BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1297 int3
1298 ret 12
1299ENDPROC iemAImpl_ %+ %1 %+ _u64
1300 %endif ; !RT_ARCH_AMD64
1301
1302%endmacro
1303
1304IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1305IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1306
1307
1308;;
1309; Macro for implementing a division operations.
1310;
1311; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1312; 32-bit system where the 64-bit accesses requires hand coding.
1313;
1314; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1315; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1316; pointer to eflags in A3.
1317;
1318; The functions all return 0 on success and -1 if a divide error should be
1319; raised by the caller.
1320;
1321; @param 1 The instruction mnemonic.
1322; @param 2 The modified flags.
1323; @param 3 The undefined flags.
1324; @param 4 1 if signed, 0 if unsigned.
1325;
1326; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1327;
1328%macro IEMIMPL_DIV_OP 4
1329BEGINCODE
1330BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1331 PROLOGUE_3_ARGS
1332
1333 test A1_8, A1_8
1334 jz .div_zero
1335 %if %4 == 0
1336 mov ax, [A0]
1337 cmp ah, A1_8
1338 jae .div_overflow
1339 %else
1340 js .divisor_negative
1341.divisor_positive:
1342 mov ax, [A0]
1343 test ax, ax
1344 js .divisor_positive_dividend_negative
1345
1346.both_positive:
1347 shl ax, 1
1348 cmp ah, A1_8
1349 jae .div_overflow
1350 jmp .div_no_overflow
1351.both_negative:
1352 neg ax
1353 shl ax, 1
1354 mov al, A1_8
1355 neg al
1356 cmp ah, al
1357 jae .div_overflow
1358 jmp .div_no_overflow
1359
1360.divisor_positive_dividend_negative:
1361 jmp .div_no_overflow
1362.divisor_negative:
1363 test ax, ax
1364 js .both_negative
1365.divisor_negative_dividend_positive:
1366 jmp .div_no_overflow
1367.div_no_overflow:
1368 %endif
1369
1370 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1371 mov ax, [A0]
1372 %1 A1_8
1373 mov [A0], ax
1374 IEM_SAVE_FLAGS A2, %2, %3
1375 xor eax, eax
1376
1377.return:
1378 EPILOGUE_3_ARGS
1379
1380.div_zero:
1381.div_overflow:
1382 mov eax, -1
1383 jmp .return
1384ENDPROC iemAImpl_ %+ %1 %+ _u8
1385
1386BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1387 PROLOGUE_4_ARGS
1388
1389 test A2_16, A2_16
1390 jz .div_zero
1391 %if %4 == 0
1392 cmp [A1], A2_16
1393 jae .div_overflow
1394 %else
1395 ;; @todo idiv overflow checking.
1396 %endif
1397
1398 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1399 %ifdef ASM_CALL64_GCC
1400 mov T1, A2
1401 mov ax, [A0]
1402 mov dx, [A1]
1403 %1 T1_16
1404 mov [A0], ax
1405 mov [A1], dx
1406 %else
1407 mov T1, A1
1408 mov ax, [A0]
1409 mov dx, [T1]
1410 %1 A2_16
1411 mov [A0], ax
1412 mov [T1], dx
1413 %endif
1414 IEM_SAVE_FLAGS A3, %2, %3
1415 xor eax, eax
1416
1417.return:
1418 EPILOGUE_4_ARGS
1419
1420.div_zero:
1421.div_overflow:
1422 mov eax, -1
1423 jmp .return
1424ENDPROC iemAImpl_ %+ %1 %+ _u16
1425
1426BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1427 PROLOGUE_4_ARGS
1428
1429 test A2_32, A2_32
1430 jz .div_zero
1431 %if %4 == 0
1432 cmp [A1], A2_32
1433 jae .div_overflow
1434 %else
1435 ;; @todo idiv overflow checking.
1436 %endif
1437
1438 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1439 mov eax, [A0]
1440 %ifdef ASM_CALL64_GCC
1441 mov T1, A2
1442 mov eax, [A0]
1443 mov edx, [A1]
1444 %1 T1_32
1445 mov [A0], eax
1446 mov [A1], edx
1447 %else
1448 mov T1, A1
1449 mov eax, [A0]
1450 mov edx, [T1]
1451 %1 A2_32
1452 mov [A0], eax
1453 mov [T1], edx
1454 %endif
1455 IEM_SAVE_FLAGS A3, %2, %3
1456 xor eax, eax
1457
1458.return:
1459 EPILOGUE_4_ARGS
1460
1461.div_zero:
1462.div_overflow:
1463 mov eax, -1
1464 jmp .return
1465ENDPROC iemAImpl_ %+ %1 %+ _u32
1466
1467 %ifdef RT_ARCH_AMD64
1468BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1469 PROLOGUE_4_ARGS
1470
1471 test A2, A2
1472 jz .div_zero
1473 %if %4 == 0
1474 cmp [A1], A2
1475 jae .div_overflow
1476 %else
1477 ;; @todo idiv overflow checking.
1478 %endif
1479
1480 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1481 mov rax, [A0]
1482 %ifdef ASM_CALL64_GCC
1483 mov T1, A2
1484 mov rax, [A0]
1485 mov rdx, [A1]
1486 %1 T1
1487 mov [A0], rax
1488 mov [A1], rdx
1489 %else
1490 mov T1, A1
1491 mov rax, [A0]
1492 mov rdx, [T1]
1493 %1 A2
1494 mov [A0], rax
1495 mov [T1], rdx
1496 %endif
1497 IEM_SAVE_FLAGS A3, %2, %3
1498 xor eax, eax
1499
1500.return:
1501 EPILOGUE_4_ARGS_EX 12
1502
1503.div_zero:
1504.div_overflow:
1505 mov eax, -1
1506 jmp .return
1507ENDPROC iemAImpl_ %+ %1 %+ _u64
1508 %else ; stub it for now - later, replace with hand coded stuff.
1509BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1510 int3
1511 ret
1512ENDPROC iemAImpl_ %+ %1 %+ _u64
1513 %endif ; !RT_ARCH_AMD64
1514
1515%endmacro
1516
1517IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1518IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1519
1520
1521;
1522; BSWAP. No flag changes.
1523;
1524; Each function takes one argument, pointer to the value to bswap
1525; (input/output). They all return void.
1526;
1527BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1528 PROLOGUE_1_ARGS
1529 mov T0_32, [A0] ; just in case any of the upper bits are used.
1530 db 66h
1531 bswap T0_32
1532 mov [A0], T0_32
1533 EPILOGUE_1_ARGS
1534ENDPROC iemAImpl_bswap_u16
1535
1536BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1537 PROLOGUE_1_ARGS
1538 mov T0_32, [A0]
1539 bswap T0_32
1540 mov [A0], T0_32
1541 EPILOGUE_1_ARGS
1542ENDPROC iemAImpl_bswap_u32
1543
1544BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1545%ifdef RT_ARCH_AMD64
1546 PROLOGUE_1_ARGS
1547 mov T0, [A0]
1548 bswap T0
1549 mov [A0], T0
1550 EPILOGUE_1_ARGS
1551%else
1552 PROLOGUE_1_ARGS
1553 mov T0, [A0]
1554 mov T1, [A0 + 4]
1555 bswap T0
1556 bswap T1
1557 mov [A0 + 4], T0
1558 mov [A0], T1
1559 EPILOGUE_1_ARGS
1560%endif
1561ENDPROC iemAImpl_bswap_u64
1562
1563
1564;;
1565; Initialize the FPU for the actual instruction being emulated, this means
1566; loading parts of the guest's control word and status word.
1567;
1568; @uses 24 bytes of stack.
1569; @param 1 Expression giving the address of the FXSTATE of the guest.
1570;
1571%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1572 fnstenv [xSP]
1573
1574 ; FCW - for exception, precision and rounding control.
1575 movzx T0, word [%1 + X86FXSTATE.FCW]
1576 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1577 mov [xSP + X86FSTENV32P.FCW], T0_16
1578
1579 ; FSW - for undefined C0, C1, C2, and C3.
1580 movzx T1, word [%1 + X86FXSTATE.FSW]
1581 and T1, X86_FSW_C_MASK
1582 movzx T0, word [xSP + X86FSTENV32P.FSW]
1583 and T0, X86_FSW_TOP_MASK
1584 or T0, T1
1585 mov [xSP + X86FSTENV32P.FSW], T0_16
1586
1587 fldenv [xSP]
1588%endmacro
1589
1590
1591;;
1592; Need to move this as well somewhere better?
1593;
1594struc IEMFPURESULT
1595 .r80Result resw 5
1596 .FSW resw 1
1597endstruc
1598
1599
1600;;
1601; Need to move this as well somewhere better?
1602;
1603struc IEMFPURESULTTWO
1604 .r80Result1 resw 5
1605 .FSW resw 1
1606 .r80Result2 resw 5
1607endstruc
1608
1609
1610;
1611;---------------------- 16-bit signed integer operations ----------------------
1612;
1613
1614
1615;;
1616; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1617;
1618; @param A0 FPU context (fxsave).
1619; @param A1 Pointer to a IEMFPURESULT for the output.
1620; @param A2 Pointer to the 16-bit floating point value to convert.
1621;
1622BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1623 PROLOGUE_3_ARGS
1624 sub xSP, 20h
1625
1626 fninit
1627 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1628 fild word [A2]
1629
1630 fnstsw word [A1 + IEMFPURESULT.FSW]
1631 fnclex
1632 fstp tword [A1 + IEMFPURESULT.r80Result]
1633
1634 fninit
1635 add xSP, 20h
1636 EPILOGUE_3_ARGS
1637ENDPROC iemAImpl_fild_i16_to_r80
1638
1639
1640;;
1641; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1642;
1643; @param A0 FPU context (fxsave).
1644; @param A1 Where to return the output FSW.
1645; @param A2 Where to store the 16-bit signed integer value.
1646; @param A3 Pointer to the 80-bit value.
1647;
1648BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1649 PROLOGUE_4_ARGS
1650 sub xSP, 20h
1651
1652 fninit
1653 fld tword [A3]
1654 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1655 fistp word [A2]
1656
1657 fnstsw word [A1]
1658
1659 fninit
1660 add xSP, 20h
1661 EPILOGUE_4_ARGS
1662ENDPROC iemAImpl_fist_r80_to_i16
1663
1664
1665;;
1666; Store a 80-bit floating point value (register) as a 16-bit signed integer
1667; (memory) with truncation.
1668;
1669; @param A0 FPU context (fxsave).
1670; @param A1 Where to return the output FSW.
1671; @param A2 Where to store the 16-bit signed integer value.
1672; @param A3 Pointer to the 80-bit value.
1673;
1674BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1675 PROLOGUE_4_ARGS
1676 sub xSP, 20h
1677
1678 fninit
1679 fld tword [A3]
1680 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1681 fisttp dword [A2]
1682
1683 fnstsw word [A1]
1684
1685 fninit
1686 add xSP, 20h
1687 EPILOGUE_4_ARGS
1688ENDPROC iemAImpl_fistt_r80_to_i16
1689
1690
1691;;
1692; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1693;
1694; @param 1 The instruction
1695;
1696; @param A0 FPU context (fxsave).
1697; @param A1 Pointer to a IEMFPURESULT for the output.
1698; @param A2 Pointer to the 80-bit value.
1699; @param A3 Pointer to the 16-bit value.
1700;
1701%macro IEMIMPL_FPU_R80_BY_I16 1
1702BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1703 PROLOGUE_4_ARGS
1704 sub xSP, 20h
1705
1706 fninit
1707 fld tword [A2]
1708 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1709 %1 word [A3]
1710
1711 fnstsw word [A1 + IEMFPURESULT.FSW]
1712 fnclex
1713 fstp tword [A1 + IEMFPURESULT.r80Result]
1714
1715 fninit
1716 add xSP, 20h
1717 EPILOGUE_4_ARGS
1718ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1719%endmacro
1720
1721IEMIMPL_FPU_R80_BY_I16 fiadd
1722IEMIMPL_FPU_R80_BY_I16 fimul
1723IEMIMPL_FPU_R80_BY_I16 fisub
1724IEMIMPL_FPU_R80_BY_I16 fisubr
1725IEMIMPL_FPU_R80_BY_I16 fidiv
1726IEMIMPL_FPU_R80_BY_I16 fidivr
1727
1728
1729;;
1730; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1731; only returning FSW.
1732;
1733; @param 1 The instruction
1734;
1735; @param A0 FPU context (fxsave).
1736; @param A1 Where to store the output FSW.
1737; @param A2 Pointer to the 80-bit value.
1738; @param A3 Pointer to the 64-bit value.
1739;
1740%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1741BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1742 PROLOGUE_4_ARGS
1743 sub xSP, 20h
1744
1745 fninit
1746 fld tword [A2]
1747 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1748 %1 word [A3]
1749
1750 fnstsw word [A1]
1751
1752 fninit
1753 add xSP, 20h
1754 EPILOGUE_4_ARGS
1755ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1756%endmacro
1757
1758IEMIMPL_FPU_R80_BY_I16_FSW ficom
1759
1760
1761
1762;
1763;---------------------- 32-bit signed integer operations ----------------------
1764;
1765
1766
1767;;
1768; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1769;
1770; @param A0 FPU context (fxsave).
1771; @param A1 Pointer to a IEMFPURESULT for the output.
1772; @param A2 Pointer to the 32-bit floating point value to convert.
1773;
1774BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1775 PROLOGUE_3_ARGS
1776 sub xSP, 20h
1777
1778 fninit
1779 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1780 fild dword [A2]
1781
1782 fnstsw word [A1 + IEMFPURESULT.FSW]
1783 fnclex
1784 fstp tword [A1 + IEMFPURESULT.r80Result]
1785
1786 fninit
1787 add xSP, 20h
1788 EPILOGUE_3_ARGS
1789ENDPROC iemAImpl_fild_i32_to_r80
1790
1791
1792;;
1793; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1794;
1795; @param A0 FPU context (fxsave).
1796; @param A1 Where to return the output FSW.
1797; @param A2 Where to store the 32-bit signed integer value.
1798; @param A3 Pointer to the 80-bit value.
1799;
1800BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1801 PROLOGUE_4_ARGS
1802 sub xSP, 20h
1803
1804 fninit
1805 fld tword [A3]
1806 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1807 fistp dword [A2]
1808
1809 fnstsw word [A1]
1810
1811 fninit
1812 add xSP, 20h
1813 EPILOGUE_4_ARGS
1814ENDPROC iemAImpl_fist_r80_to_i32
1815
1816
1817;;
1818; Store a 80-bit floating point value (register) as a 32-bit signed integer
1819; (memory) with truncation.
1820;
1821; @param A0 FPU context (fxsave).
1822; @param A1 Where to return the output FSW.
1823; @param A2 Where to store the 32-bit signed integer value.
1824; @param A3 Pointer to the 80-bit value.
1825;
1826BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1827 PROLOGUE_4_ARGS
1828 sub xSP, 20h
1829
1830 fninit
1831 fld tword [A3]
1832 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1833 fisttp dword [A2]
1834
1835 fnstsw word [A1]
1836
1837 fninit
1838 add xSP, 20h
1839 EPILOGUE_4_ARGS
1840ENDPROC iemAImpl_fistt_r80_to_i32
1841
1842
1843;;
1844; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1845;
1846; @param 1 The instruction
1847;
1848; @param A0 FPU context (fxsave).
1849; @param A1 Pointer to a IEMFPURESULT for the output.
1850; @param A2 Pointer to the 80-bit value.
1851; @param A3 Pointer to the 32-bit value.
1852;
1853%macro IEMIMPL_FPU_R80_BY_I32 1
1854BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1855 PROLOGUE_4_ARGS
1856 sub xSP, 20h
1857
1858 fninit
1859 fld tword [A2]
1860 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1861 %1 dword [A3]
1862
1863 fnstsw word [A1 + IEMFPURESULT.FSW]
1864 fnclex
1865 fstp tword [A1 + IEMFPURESULT.r80Result]
1866
1867 fninit
1868 add xSP, 20h
1869 EPILOGUE_4_ARGS
1870ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1871%endmacro
1872
1873IEMIMPL_FPU_R80_BY_I32 fiadd
1874IEMIMPL_FPU_R80_BY_I32 fimul
1875IEMIMPL_FPU_R80_BY_I32 fisub
1876IEMIMPL_FPU_R80_BY_I32 fisubr
1877IEMIMPL_FPU_R80_BY_I32 fidiv
1878IEMIMPL_FPU_R80_BY_I32 fidivr
1879
1880
1881;;
1882; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1883; only returning FSW.
1884;
1885; @param 1 The instruction
1886;
1887; @param A0 FPU context (fxsave).
1888; @param A1 Where to store the output FSW.
1889; @param A2 Pointer to the 80-bit value.
1890; @param A3 Pointer to the 64-bit value.
1891;
1892%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
1893BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1894 PROLOGUE_4_ARGS
1895 sub xSP, 20h
1896
1897 fninit
1898 fld tword [A2]
1899 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1900 %1 dword [A3]
1901
1902 fnstsw word [A1]
1903
1904 fninit
1905 add xSP, 20h
1906 EPILOGUE_4_ARGS
1907ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1908%endmacro
1909
1910IEMIMPL_FPU_R80_BY_I32_FSW ficom
1911
1912
1913
1914;
1915;---------------------- 64-bit signed integer operations ----------------------
1916;
1917
1918
1919;;
1920; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1921;
1922; @param A0 FPU context (fxsave).
1923; @param A1 Pointer to a IEMFPURESULT for the output.
1924; @param A2 Pointer to the 64-bit floating point value to convert.
1925;
1926BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
1927 PROLOGUE_3_ARGS
1928 sub xSP, 20h
1929
1930 fninit
1931 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1932 fild qword [A2]
1933
1934 fnstsw word [A1 + IEMFPURESULT.FSW]
1935 fnclex
1936 fstp tword [A1 + IEMFPURESULT.r80Result]
1937
1938 fninit
1939 add xSP, 20h
1940 EPILOGUE_3_ARGS
1941ENDPROC iemAImpl_fild_i64_to_r80
1942
1943
1944;;
1945; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
1946;
1947; @param A0 FPU context (fxsave).
1948; @param A1 Where to return the output FSW.
1949; @param A2 Where to store the 64-bit signed integer value.
1950; @param A3 Pointer to the 80-bit value.
1951;
1952BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
1953 PROLOGUE_4_ARGS
1954 sub xSP, 20h
1955
1956 fninit
1957 fld tword [A3]
1958 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1959 fistp qword [A2]
1960
1961 fnstsw word [A1]
1962
1963 fninit
1964 add xSP, 20h
1965 EPILOGUE_4_ARGS
1966ENDPROC iemAImpl_fist_r80_to_i64
1967
1968
1969;;
1970; Store a 80-bit floating point value (register) as a 64-bit signed integer
1971; (memory) with truncation.
1972;
1973; @param A0 FPU context (fxsave).
1974; @param A1 Where to return the output FSW.
1975; @param A2 Where to store the 64-bit signed integer value.
1976; @param A3 Pointer to the 80-bit value.
1977;
1978BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
1979 PROLOGUE_4_ARGS
1980 sub xSP, 20h
1981
1982 fninit
1983 fld tword [A3]
1984 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1985 fisttp qword [A2]
1986
1987 fnstsw word [A1]
1988
1989 fninit
1990 add xSP, 20h
1991 EPILOGUE_4_ARGS
1992ENDPROC iemAImpl_fistt_r80_to_i64
1993
1994
1995
1996;
1997;---------------------- 32-bit floating point operations ----------------------
1998;
1999
2000;;
2001; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2002;
2003; @param A0 FPU context (fxsave).
2004; @param A1 Pointer to a IEMFPURESULT for the output.
2005; @param A2 Pointer to the 32-bit floating point value to convert.
2006;
2007BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2008 PROLOGUE_3_ARGS
2009 sub xSP, 20h
2010
2011 fninit
2012 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2013 fld dword [A2]
2014
2015 fnstsw word [A1 + IEMFPURESULT.FSW]
2016 fnclex
2017 fstp tword [A1 + IEMFPURESULT.r80Result]
2018
2019 fninit
2020 add xSP, 20h
2021 EPILOGUE_3_ARGS
2022ENDPROC iemAImpl_fld_r32_to_r80
2023
2024
2025;;
2026; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2027;
2028; @param A0 FPU context (fxsave).
2029; @param A1 Where to return the output FSW.
2030; @param A2 Where to store the 32-bit value.
2031; @param A3 Pointer to the 80-bit value.
2032;
2033BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2034 PROLOGUE_4_ARGS
2035 sub xSP, 20h
2036
2037 fninit
2038 fld tword [A3]
2039 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2040 fst dword [A2]
2041
2042 fnstsw word [A1]
2043
2044 fninit
2045 add xSP, 20h
2046 EPILOGUE_4_ARGS
2047ENDPROC iemAImpl_fst_r80_to_r32
2048
2049
2050;;
2051; FPU instruction working on one 80-bit and one 32-bit floating point value.
2052;
2053; @param 1 The instruction
2054;
2055; @param A0 FPU context (fxsave).
2056; @param A1 Pointer to a IEMFPURESULT for the output.
2057; @param A2 Pointer to the 80-bit value.
2058; @param A3 Pointer to the 32-bit value.
2059;
2060%macro IEMIMPL_FPU_R80_BY_R32 1
2061BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2062 PROLOGUE_4_ARGS
2063 sub xSP, 20h
2064
2065 fninit
2066 fld tword [A2]
2067 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2068 %1 dword [A3]
2069
2070 fnstsw word [A1 + IEMFPURESULT.FSW]
2071 fnclex
2072 fstp tword [A1 + IEMFPURESULT.r80Result]
2073
2074 fninit
2075 add xSP, 20h
2076 EPILOGUE_4_ARGS
2077ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2078%endmacro
2079
2080IEMIMPL_FPU_R80_BY_R32 fadd
2081IEMIMPL_FPU_R80_BY_R32 fmul
2082IEMIMPL_FPU_R80_BY_R32 fsub
2083IEMIMPL_FPU_R80_BY_R32 fsubr
2084IEMIMPL_FPU_R80_BY_R32 fdiv
2085IEMIMPL_FPU_R80_BY_R32 fdivr
2086
2087
2088;;
2089; FPU instruction working on one 80-bit and one 32-bit floating point value,
2090; only returning FSW.
2091;
2092; @param 1 The instruction
2093;
2094; @param A0 FPU context (fxsave).
2095; @param A1 Where to store the output FSW.
2096; @param A2 Pointer to the 80-bit value.
2097; @param A3 Pointer to the 64-bit value.
2098;
2099%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2100BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2101 PROLOGUE_4_ARGS
2102 sub xSP, 20h
2103
2104 fninit
2105 fld tword [A2]
2106 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2107 %1 dword [A3]
2108
2109 fnstsw word [A1]
2110
2111 fninit
2112 add xSP, 20h
2113 EPILOGUE_4_ARGS
2114ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2115%endmacro
2116
2117IEMIMPL_FPU_R80_BY_R32_FSW fcom
2118
2119
2120
2121;
2122;---------------------- 64-bit floating point operations ----------------------
2123;
2124
2125;;
2126; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2127;
2128; @param A0 FPU context (fxsave).
2129; @param A1 Pointer to a IEMFPURESULT for the output.
2130; @param A2 Pointer to the 64-bit floating point value to convert.
2131;
2132BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2133 PROLOGUE_3_ARGS
2134 sub xSP, 20h
2135
2136 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2137 fld qword [A2]
2138
2139 fnstsw word [A1 + IEMFPURESULT.FSW]
2140 fnclex
2141 fstp tword [A1 + IEMFPURESULT.r80Result]
2142
2143 fninit
2144 add xSP, 20h
2145 EPILOGUE_3_ARGS
2146ENDPROC iemAImpl_fld_r64_to_r80
2147
2148
2149;;
2150; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2151;
2152; @param A0 FPU context (fxsave).
2153; @param A1 Where to return the output FSW.
2154; @param A2 Where to store the 64-bit value.
2155; @param A3 Pointer to the 80-bit value.
2156;
2157BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2158 PROLOGUE_4_ARGS
2159 sub xSP, 20h
2160
2161 fninit
2162 fld tword [A3]
2163 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2164 fst qword [A2]
2165
2166 fnstsw word [A1]
2167
2168 fninit
2169 add xSP, 20h
2170 EPILOGUE_4_ARGS
2171ENDPROC iemAImpl_fst_r80_to_r64
2172
2173
2174;;
2175; FPU instruction working on one 80-bit and one 64-bit floating point value.
2176;
2177; @param 1 The instruction
2178;
2179; @param A0 FPU context (fxsave).
2180; @param A1 Pointer to a IEMFPURESULT for the output.
2181; @param A2 Pointer to the 80-bit value.
2182; @param A3 Pointer to the 64-bit value.
2183;
2184%macro IEMIMPL_FPU_R80_BY_R64 1
2185BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2186 PROLOGUE_4_ARGS
2187 sub xSP, 20h
2188
2189 fninit
2190 fld tword [A2]
2191 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2192 %1 qword [A3]
2193
2194 fnstsw word [A1 + IEMFPURESULT.FSW]
2195 fnclex
2196 fstp tword [A1 + IEMFPURESULT.r80Result]
2197
2198 fninit
2199 add xSP, 20h
2200 EPILOGUE_4_ARGS
2201ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2202%endmacro
2203
2204IEMIMPL_FPU_R80_BY_R64 fadd
2205IEMIMPL_FPU_R80_BY_R64 fmul
2206IEMIMPL_FPU_R80_BY_R64 fsub
2207IEMIMPL_FPU_R80_BY_R64 fsubr
2208IEMIMPL_FPU_R80_BY_R64 fdiv
2209IEMIMPL_FPU_R80_BY_R64 fdivr
2210
2211;;
2212; FPU instruction working on one 80-bit and one 64-bit floating point value,
2213; only returning FSW.
2214;
2215; @param 1 The instruction
2216;
2217; @param A0 FPU context (fxsave).
2218; @param A1 Where to store the output FSW.
2219; @param A2 Pointer to the 80-bit value.
2220; @param A3 Pointer to the 64-bit value.
2221;
2222%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2223BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2224 PROLOGUE_4_ARGS
2225 sub xSP, 20h
2226
2227 fninit
2228 fld tword [A2]
2229 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2230 %1 qword [A3]
2231
2232 fnstsw word [A1]
2233
2234 fninit
2235 add xSP, 20h
2236 EPILOGUE_4_ARGS
2237ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2238%endmacro
2239
2240IEMIMPL_FPU_R80_BY_R64_FSW fcom
2241
2242
2243
2244;
2245;---------------------- 80-bit floating point operations ----------------------
2246;
2247
2248;;
2249; Loads a 80-bit floating point register value from memory.
2250;
2251; @param A0 FPU context (fxsave).
2252; @param A1 Pointer to a IEMFPURESULT for the output.
2253; @param A2 Pointer to the 80-bit floating point value to load.
2254;
2255BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2256 PROLOGUE_3_ARGS
2257 sub xSP, 20h
2258
2259 fninit
2260 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2261 fld tword [A2]
2262
2263 fnstsw word [A1 + IEMFPURESULT.FSW]
2264 fnclex
2265 fstp tword [A1 + IEMFPURESULT.r80Result]
2266
2267 fninit
2268 add xSP, 20h
2269 EPILOGUE_3_ARGS
2270ENDPROC iemAImpl_fld_r80_from_r80
2271
2272
2273;;
2274; Store a 80-bit floating point register to memory
2275;
2276; @param A0 FPU context (fxsave).
2277; @param A1 Where to return the output FSW.
2278; @param A2 Where to store the 80-bit value.
2279; @param A3 Pointer to the 80-bit register value.
2280;
2281BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2282 PROLOGUE_4_ARGS
2283 sub xSP, 20h
2284
2285 fninit
2286 fld tword [A3]
2287 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2288 fstp tword [A2]
2289
2290 fnstsw word [A1]
2291
2292 fninit
2293 add xSP, 20h
2294 EPILOGUE_4_ARGS
2295ENDPROC iemAImpl_fst_r80_to_r80
2296
2297
2298;;
2299; FPU instruction working on two 80-bit floating point values.
2300;
2301; @param 1 The instruction
2302;
2303; @param A0 FPU context (fxsave).
2304; @param A1 Pointer to a IEMFPURESULT for the output.
2305; @param A2 Pointer to the first 80-bit value (ST0)
2306; @param A3 Pointer to the second 80-bit value (STn).
2307;
2308%macro IEMIMPL_FPU_R80_BY_R80 2
2309BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2310 PROLOGUE_4_ARGS
2311 sub xSP, 20h
2312
2313 fninit
2314 fld tword [A3]
2315 fld tword [A2]
2316 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2317 %1 %2
2318
2319 fnstsw word [A1 + IEMFPURESULT.FSW]
2320 fnclex
2321 fstp tword [A1 + IEMFPURESULT.r80Result]
2322
2323 fninit
2324 add xSP, 20h
2325 EPILOGUE_4_ARGS
2326ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2327%endmacro
2328
2329IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2330IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2331IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2332IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2333IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2334IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2335IEMIMPL_FPU_R80_BY_R80 fprem, {}
2336IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2337IEMIMPL_FPU_R80_BY_R80 fscale, {}
2338
2339
2340;;
2341; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2342; storing the result in ST1 and popping the stack.
2343;
2344; @param 1 The instruction
2345;
2346; @param A0 FPU context (fxsave).
2347; @param A1 Pointer to a IEMFPURESULT for the output.
2348; @param A2 Pointer to the first 80-bit value (ST1).
2349; @param A3 Pointer to the second 80-bit value (ST0).
2350;
2351%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2352BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2353 PROLOGUE_4_ARGS
2354 sub xSP, 20h
2355
2356 fninit
2357 fld tword [A2]
2358 fld tword [A3]
2359 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2360 %1
2361
2362 fnstsw word [A1 + IEMFPURESULT.FSW]
2363 fnclex
2364 fstp tword [A1 + IEMFPURESULT.r80Result]
2365
2366 fninit
2367 add xSP, 20h
2368 EPILOGUE_4_ARGS
2369ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2370%endmacro
2371
2372IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2373IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2374
2375
2376;;
2377; FPU instruction working on two 80-bit floating point values, only
2378; returning FSW.
2379;
2380; @param 1 The instruction
2381;
2382; @param A0 FPU context (fxsave).
2383; @param A1 Pointer to a uint16_t for the resulting FSW.
2384; @param A2 Pointer to the first 80-bit value.
2385; @param A3 Pointer to the second 80-bit value.
2386;
2387%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2388BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2389 PROLOGUE_4_ARGS
2390 sub xSP, 20h
2391
2392 fninit
2393 fld tword [A3]
2394 fld tword [A2]
2395 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2396 %1 st0, st1
2397
2398 fnstsw word [A1]
2399
2400 fninit
2401 add xSP, 20h
2402 EPILOGUE_4_ARGS
2403ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2404%endmacro
2405
2406IEMIMPL_FPU_R80_BY_R80_FSW fcom
2407IEMIMPL_FPU_R80_BY_R80_FSW fucom
2408
2409
2410;;
2411; FPU instruction working on two 80-bit floating point values,
2412; returning FSW and EFLAGS (eax).
2413;
2414; @param 1 The instruction
2415;
2416; @returns EFLAGS in EAX.
2417; @param A0 FPU context (fxsave).
2418; @param A1 Pointer to a uint16_t for the resulting FSW.
2419; @param A2 Pointer to the first 80-bit value.
2420; @param A3 Pointer to the second 80-bit value.
2421;
2422%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2423BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2424 PROLOGUE_4_ARGS
2425 sub xSP, 20h
2426
2427 fninit
2428 fld tword [A3]
2429 fld tword [A2]
2430 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2431 %1 st1
2432
2433 fnstsw word [A1]
2434 pushf
2435 pop xAX
2436
2437 fninit
2438 add xSP, 20h
2439 EPILOGUE_4_ARGS
2440ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2441%endmacro
2442
2443IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2444IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2445
2446
2447;;
2448; FPU instruction working on one 80-bit floating point value.
2449;
2450; @param 1 The instruction
2451;
2452; @param A0 FPU context (fxsave).
2453; @param A1 Pointer to a IEMFPURESULT for the output.
2454; @param A2 Pointer to the 80-bit value.
2455;
2456%macro IEMIMPL_FPU_R80 1
2457BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2458 PROLOGUE_3_ARGS
2459 sub xSP, 20h
2460
2461 fninit
2462 fld tword [A2]
2463 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2464 %1
2465
2466 fnstsw word [A1 + IEMFPURESULT.FSW]
2467 fnclex
2468 fstp tword [A1 + IEMFPURESULT.r80Result]
2469
2470 fninit
2471 add xSP, 20h
2472 EPILOGUE_3_ARGS
2473ENDPROC iemAImpl_ %+ %1 %+ _r80
2474%endmacro
2475
2476IEMIMPL_FPU_R80 fchs
2477IEMIMPL_FPU_R80 fabs
2478IEMIMPL_FPU_R80 f2xm1
2479IEMIMPL_FPU_R80 fyl2x
2480IEMIMPL_FPU_R80 fsqrt
2481IEMIMPL_FPU_R80 frndint
2482IEMIMPL_FPU_R80 fsin
2483IEMIMPL_FPU_R80 fcos
2484
2485
2486;;
2487; FPU instruction working on one 80-bit floating point value, only
2488; returning FSW.
2489;
2490; @param 1 The instruction
2491;
2492; @param A0 FPU context (fxsave).
2493; @param A1 Pointer to a uint16_t for the resulting FSW.
2494; @param A2 Pointer to the 80-bit value.
2495;
2496%macro IEMIMPL_FPU_R80_FSW 1
2497BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2498 PROLOGUE_3_ARGS
2499 sub xSP, 20h
2500
2501 fninit
2502 fld tword [A2]
2503 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2504 %1
2505
2506 fnstsw word [A1]
2507
2508 fninit
2509 add xSP, 20h
2510 EPILOGUE_3_ARGS
2511ENDPROC iemAImpl_ %+ %1 %+ _r80
2512%endmacro
2513
2514IEMIMPL_FPU_R80_FSW ftst
2515IEMIMPL_FPU_R80_FSW fxam
2516
2517
2518
2519;;
2520; FPU instruction loading a 80-bit floating point constant.
2521;
2522; @param 1 The instruction
2523;
2524; @param A0 FPU context (fxsave).
2525; @param A1 Pointer to a IEMFPURESULT for the output.
2526;
2527%macro IEMIMPL_FPU_R80_CONST 1
2528BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2529 PROLOGUE_2_ARGS
2530 sub xSP, 20h
2531
2532 fninit
2533 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2534 %1
2535
2536 fnstsw word [A1 + IEMFPURESULT.FSW]
2537 fnclex
2538 fstp tword [A1 + IEMFPURESULT.r80Result]
2539
2540 fninit
2541 add xSP, 20h
2542 EPILOGUE_2_ARGS
2543ENDPROC iemAImpl_ %+ %1 %+
2544%endmacro
2545
2546IEMIMPL_FPU_R80_CONST fld1
2547IEMIMPL_FPU_R80_CONST fldl2t
2548IEMIMPL_FPU_R80_CONST fldl2e
2549IEMIMPL_FPU_R80_CONST fldpi
2550IEMIMPL_FPU_R80_CONST fldlg2
2551IEMIMPL_FPU_R80_CONST fldln2
2552IEMIMPL_FPU_R80_CONST fldz
2553
2554
2555;;
2556; FPU instruction working on one 80-bit floating point value, outputing two.
2557;
2558; @param 1 The instruction
2559;
2560; @param A0 FPU context (fxsave).
2561; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2562; @param A2 Pointer to the 80-bit value.
2563;
2564%macro IEMIMPL_FPU_R80_R80 1
2565BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2566 PROLOGUE_3_ARGS
2567 sub xSP, 20h
2568
2569 fninit
2570 fld tword [A2]
2571 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2572 %1
2573
2574 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2575 fnclex
2576 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2577 fnclex
2578 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2579
2580 fninit
2581 add xSP, 20h
2582 EPILOGUE_3_ARGS
2583ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2584%endmacro
2585
2586IEMIMPL_FPU_R80_R80 fptan
2587IEMIMPL_FPU_R80_R80 fxtract
2588IEMIMPL_FPU_R80_R80 fsincos
2589
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use