VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 84044

Last change on this file since 84044 was 82968, checked in by vboxsync, 4 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 81.7 KB
Line 
1; $Id: IEMAllAImpl.asm 82968 2020-02-04 10:35:17Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6;
7; Copyright (C) 2011-2020 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18
19;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20; Header Files ;
21;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "iprt/x86.mac"
25
26
27;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28; Defined Constants And Macros ;
29;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30
31;;
32; RET XX / RET wrapper for fastcall.
33;
34%macro RET_FASTCALL 1
35%ifdef RT_ARCH_X86
36 %ifdef RT_OS_WINDOWS
37 ret %1
38 %else
39 ret
40 %endif
41%else
42 ret
43%endif
44%endmacro
45
46;;
47; NAME for fastcall functions.
48;
49;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50; escaping (or whatever the dollar is good for here). Thus the ugly
51; prefix argument.
52;
53%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54%ifdef RT_ARCH_X86
55 %ifdef RT_OS_WINDOWS
56 %undef NAME_FASTCALL
57 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58 %endif
59%endif
60
61;;
62; BEGINPROC for fastcall functions.
63;
64; @param 1 The function name (C).
65; @param 2 The argument size on x86.
66;
67%macro BEGINPROC_FASTCALL 2
68 %ifdef ASM_FORMAT_PE
69 export %1=NAME_FASTCALL(%1,%2,$@)
70 %endif
71 %ifdef __NASM__
72 %ifdef ASM_FORMAT_OMF
73 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74 %endif
75 %endif
76 %ifndef ASM_FORMAT_BIN
77 global NAME_FASTCALL(%1,%2,$@)
78 %endif
79NAME_FASTCALL(%1,%2,@):
80%endmacro
81
82
83;
84; We employ some macro assembly here to hid the calling convention differences.
85;
86%ifdef RT_ARCH_AMD64
87 %macro PROLOGUE_1_ARGS 0
88 %endmacro
89 %macro EPILOGUE_1_ARGS 0
90 ret
91 %endmacro
92 %macro EPILOGUE_1_ARGS_EX 0
93 ret
94 %endmacro
95
96 %macro PROLOGUE_2_ARGS 0
97 %endmacro
98 %macro EPILOGUE_2_ARGS 0
99 ret
100 %endmacro
101 %macro EPILOGUE_2_ARGS_EX 1
102 ret
103 %endmacro
104
105 %macro PROLOGUE_3_ARGS 0
106 %endmacro
107 %macro EPILOGUE_3_ARGS 0
108 ret
109 %endmacro
110 %macro EPILOGUE_3_ARGS_EX 1
111 ret
112 %endmacro
113
114 %macro PROLOGUE_4_ARGS 0
115 %endmacro
116 %macro EPILOGUE_4_ARGS 0
117 ret
118 %endmacro
119 %macro EPILOGUE_4_ARGS_EX 1
120 ret
121 %endmacro
122
123 %ifdef ASM_CALL64_GCC
124 %define A0 rdi
125 %define A0_32 edi
126 %define A0_16 di
127 %define A0_8 dil
128
129 %define A1 rsi
130 %define A1_32 esi
131 %define A1_16 si
132 %define A1_8 sil
133
134 %define A2 rdx
135 %define A2_32 edx
136 %define A2_16 dx
137 %define A2_8 dl
138
139 %define A3 rcx
140 %define A3_32 ecx
141 %define A3_16 cx
142 %endif
143
144 %ifdef ASM_CALL64_MSC
145 %define A0 rcx
146 %define A0_32 ecx
147 %define A0_16 cx
148 %define A0_8 cl
149
150 %define A1 rdx
151 %define A1_32 edx
152 %define A1_16 dx
153 %define A1_8 dl
154
155 %define A2 r8
156 %define A2_32 r8d
157 %define A2_16 r8w
158 %define A2_8 r8b
159
160 %define A3 r9
161 %define A3_32 r9d
162 %define A3_16 r9w
163 %endif
164
165 %define T0 rax
166 %define T0_32 eax
167 %define T0_16 ax
168 %define T0_8 al
169
170 %define T1 r11
171 %define T1_32 r11d
172 %define T1_16 r11w
173 %define T1_8 r11b
174
175%else
176 ; x86
177 %macro PROLOGUE_1_ARGS 0
178 push edi
179 %endmacro
180 %macro EPILOGUE_1_ARGS 0
181 pop edi
182 ret 0
183 %endmacro
184 %macro EPILOGUE_1_ARGS_EX 1
185 pop edi
186 ret %1
187 %endmacro
188
189 %macro PROLOGUE_2_ARGS 0
190 push edi
191 %endmacro
192 %macro EPILOGUE_2_ARGS 0
193 pop edi
194 ret 0
195 %endmacro
196 %macro EPILOGUE_2_ARGS_EX 1
197 pop edi
198 ret %1
199 %endmacro
200
201 %macro PROLOGUE_3_ARGS 0
202 push ebx
203 mov ebx, [esp + 4 + 4]
204 push edi
205 %endmacro
206 %macro EPILOGUE_3_ARGS_EX 1
207 %if (%1) < 4
208 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
209 %endif
210 pop edi
211 pop ebx
212 ret %1
213 %endmacro
214 %macro EPILOGUE_3_ARGS 0
215 EPILOGUE_3_ARGS_EX 4
216 %endmacro
217
218 %macro PROLOGUE_4_ARGS 0
219 push ebx
220 push edi
221 push esi
222 mov ebx, [esp + 12 + 4 + 0]
223 mov esi, [esp + 12 + 4 + 4]
224 %endmacro
225 %macro EPILOGUE_4_ARGS_EX 1
226 %if (%1) < 8
227 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
228 %endif
229 pop esi
230 pop edi
231 pop ebx
232 ret %1
233 %endmacro
234 %macro EPILOGUE_4_ARGS 0
235 EPILOGUE_4_ARGS_EX 8
236 %endmacro
237
238 %define A0 ecx
239 %define A0_32 ecx
240 %define A0_16 cx
241 %define A0_8 cl
242
243 %define A1 edx
244 %define A1_32 edx
245 %define A1_16 dx
246 %define A1_8 dl
247
248 %define A2 ebx
249 %define A2_32 ebx
250 %define A2_16 bx
251 %define A2_8 bl
252
253 %define A3 esi
254 %define A3_32 esi
255 %define A3_16 si
256
257 %define T0 eax
258 %define T0_32 eax
259 %define T0_16 ax
260 %define T0_8 al
261
262 %define T1 edi
263 %define T1_32 edi
264 %define T1_16 di
265%endif
266
267
268;;
269; Load the relevant flags from [%1] if there are undefined flags (%3).
270;
271; @remarks Clobbers T0, stack. Changes EFLAGS.
272; @param A2 The register pointing to the flags.
273; @param 1 The parameter (A0..A3) pointing to the eflags.
274; @param 2 The set of modified flags.
275; @param 3 The set of undefined flags.
276;
277%macro IEM_MAYBE_LOAD_FLAGS 3
278 ;%if (%3) != 0
279 pushf ; store current flags
280 mov T0_32, [%1] ; load the guest flags
281 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
282 and T0_32, (%2 | %3) ; select the modified and undefined flags.
283 or [xSP], T0 ; merge guest flags with host flags.
284 popf ; load the mixed flags.
285 ;%endif
286%endmacro
287
288;;
289; Update the flag.
290;
291; @remarks Clobbers T0, T1, stack.
292; @param 1 The register pointing to the EFLAGS.
293; @param 2 The mask of modified flags to save.
294; @param 3 The mask of undefined flags to (maybe) save.
295;
296%macro IEM_SAVE_FLAGS 3
297 %if (%2 | %3) != 0
298 pushf
299 pop T1
300 mov T0_32, [%1] ; flags
301 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
302 and T1_32, (%2 | %3) ; select the modified and undefined flags.
303 or T0_32, T1_32 ; combine the flags.
304 mov [%1], T0_32 ; save the flags.
305 %endif
306%endmacro
307
308
309;;
310; Macro for implementing a binary operator.
311;
312; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
313; variants, except on 32-bit system where the 64-bit accesses requires hand
314; coding.
315;
316; All the functions takes a pointer to the destination memory operand in A0,
317; the source register operand in A1 and a pointer to eflags in A2.
318;
319; @param 1 The instruction mnemonic.
320; @param 2 Non-zero if there should be a locked version.
321; @param 3 The modified flags.
322; @param 4 The undefined flags.
323;
324%macro IEMIMPL_BIN_OP 4
325BEGINCODE
326BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
327 PROLOGUE_3_ARGS
328 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
329 %1 byte [A0], A1_8
330 IEM_SAVE_FLAGS A2, %3, %4
331 EPILOGUE_3_ARGS
332ENDPROC iemAImpl_ %+ %1 %+ _u8
333
334BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
335 PROLOGUE_3_ARGS
336 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
337 %1 word [A0], A1_16
338 IEM_SAVE_FLAGS A2, %3, %4
339 EPILOGUE_3_ARGS
340ENDPROC iemAImpl_ %+ %1 %+ _u16
341
342BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
343 PROLOGUE_3_ARGS
344 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
345 %1 dword [A0], A1_32
346 IEM_SAVE_FLAGS A2, %3, %4
347 EPILOGUE_3_ARGS
348ENDPROC iemAImpl_ %+ %1 %+ _u32
349
350 %ifdef RT_ARCH_AMD64
351BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
352 PROLOGUE_3_ARGS
353 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
354 %1 qword [A0], A1
355 IEM_SAVE_FLAGS A2, %3, %4
356 EPILOGUE_3_ARGS_EX 8
357ENDPROC iemAImpl_ %+ %1 %+ _u64
358 %endif ; RT_ARCH_AMD64
359
360 %if %2 != 0 ; locked versions requested?
361
362BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
363 PROLOGUE_3_ARGS
364 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
365 lock %1 byte [A0], A1_8
366 IEM_SAVE_FLAGS A2, %3, %4
367 EPILOGUE_3_ARGS
368ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
369
370BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
371 PROLOGUE_3_ARGS
372 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
373 lock %1 word [A0], A1_16
374 IEM_SAVE_FLAGS A2, %3, %4
375 EPILOGUE_3_ARGS
376ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
377
378BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
379 PROLOGUE_3_ARGS
380 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
381 lock %1 dword [A0], A1_32
382 IEM_SAVE_FLAGS A2, %3, %4
383 EPILOGUE_3_ARGS
384ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
385
386 %ifdef RT_ARCH_AMD64
387BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
388 PROLOGUE_3_ARGS
389 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
390 lock %1 qword [A0], A1
391 IEM_SAVE_FLAGS A2, %3, %4
392 EPILOGUE_3_ARGS_EX 8
393ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
394 %endif ; RT_ARCH_AMD64
395 %endif ; locked
396%endmacro
397
398; instr,lock,modified-flags.
399IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
400IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
401IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
402IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
403IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
404IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
405IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
406IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
407IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
408
409
410;;
411; Macro for implementing a bit operator.
412;
413; This will generate code for the 16, 32 and 64 bit accesses with locked
414; variants, except on 32-bit system where the 64-bit accesses requires hand
415; coding.
416;
417; All the functions takes a pointer to the destination memory operand in A0,
418; the source register operand in A1 and a pointer to eflags in A2.
419;
420; @param 1 The instruction mnemonic.
421; @param 2 Non-zero if there should be a locked version.
422; @param 3 The modified flags.
423; @param 4 The undefined flags.
424;
425%macro IEMIMPL_BIT_OP 4
426BEGINCODE
427BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
428 PROLOGUE_3_ARGS
429 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
430 %1 word [A0], A1_16
431 IEM_SAVE_FLAGS A2, %3, %4
432 EPILOGUE_3_ARGS
433ENDPROC iemAImpl_ %+ %1 %+ _u16
434
435BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
436 PROLOGUE_3_ARGS
437 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
438 %1 dword [A0], A1_32
439 IEM_SAVE_FLAGS A2, %3, %4
440 EPILOGUE_3_ARGS
441ENDPROC iemAImpl_ %+ %1 %+ _u32
442
443 %ifdef RT_ARCH_AMD64
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 qword [A0], A1
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS_EX 8
450ENDPROC iemAImpl_ %+ %1 %+ _u64
451 %endif ; RT_ARCH_AMD64
452
453 %if %2 != 0 ; locked versions requested?
454
455BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
456 PROLOGUE_3_ARGS
457 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
458 lock %1 word [A0], A1_16
459 IEM_SAVE_FLAGS A2, %3, %4
460 EPILOGUE_3_ARGS
461ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
462
463BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
464 PROLOGUE_3_ARGS
465 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
466 lock %1 dword [A0], A1_32
467 IEM_SAVE_FLAGS A2, %3, %4
468 EPILOGUE_3_ARGS
469ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
470
471 %ifdef RT_ARCH_AMD64
472BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
473 PROLOGUE_3_ARGS
474 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
475 lock %1 qword [A0], A1
476 IEM_SAVE_FLAGS A2, %3, %4
477 EPILOGUE_3_ARGS_EX 8
478ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
479 %endif ; RT_ARCH_AMD64
480 %endif ; locked
481%endmacro
482IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
483IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
484IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
485IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
486
487;;
488; Macro for implementing a bit search operator.
489;
490; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
491; system where the 64-bit accesses requires hand coding.
492;
493; All the functions takes a pointer to the destination memory operand in A0,
494; the source register operand in A1 and a pointer to eflags in A2.
495;
496; @param 1 The instruction mnemonic.
497; @param 2 The modified flags.
498; @param 3 The undefined flags.
499;
500%macro IEMIMPL_BIT_OP 3
501BEGINCODE
502BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
503 PROLOGUE_3_ARGS
504 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
505 %1 T0_16, A1_16
506 jz .unchanged_dst
507 mov [A0], T0_16
508.unchanged_dst:
509 IEM_SAVE_FLAGS A2, %2, %3
510 EPILOGUE_3_ARGS
511ENDPROC iemAImpl_ %+ %1 %+ _u16
512
513BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
514 PROLOGUE_3_ARGS
515 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
516 %1 T0_32, A1_32
517 jz .unchanged_dst
518 mov [A0], T0_32
519.unchanged_dst:
520 IEM_SAVE_FLAGS A2, %2, %3
521 EPILOGUE_3_ARGS
522ENDPROC iemAImpl_ %+ %1 %+ _u32
523
524 %ifdef RT_ARCH_AMD64
525BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
526 PROLOGUE_3_ARGS
527 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
528 %1 T0, A1
529 jz .unchanged_dst
530 mov [A0], T0
531.unchanged_dst:
532 IEM_SAVE_FLAGS A2, %2, %3
533 EPILOGUE_3_ARGS_EX 8
534ENDPROC iemAImpl_ %+ %1 %+ _u64
535 %endif ; RT_ARCH_AMD64
536%endmacro
537IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
538IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
539
540
541;
542; IMUL is also a similar but yet different case (no lock, no mem dst).
543; The rDX:rAX variant of imul is handled together with mul further down.
544;
545BEGINCODE
546BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
547 PROLOGUE_3_ARGS
548 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
549 imul A1_16, word [A0]
550 mov [A0], A1_16
551 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
552 EPILOGUE_3_ARGS
553ENDPROC iemAImpl_imul_two_u16
554
555BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
556 PROLOGUE_3_ARGS
557 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
558 imul A1_32, dword [A0]
559 mov [A0], A1_32
560 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
561 EPILOGUE_3_ARGS
562ENDPROC iemAImpl_imul_two_u32
563
564%ifdef RT_ARCH_AMD64
565BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
566 PROLOGUE_3_ARGS
567 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
568 imul A1, qword [A0]
569 mov [A0], A1
570 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
571 EPILOGUE_3_ARGS_EX 8
572ENDPROC iemAImpl_imul_two_u64
573%endif ; RT_ARCH_AMD64
574
575
576;
577; XCHG for memory operands. This implies locking. No flag changes.
578;
579; Each function takes two arguments, first the pointer to the memory,
580; then the pointer to the register. They all return void.
581;
582BEGINCODE
583BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
584 PROLOGUE_2_ARGS
585 mov T0_8, [A1]
586 xchg [A0], T0_8
587 mov [A1], T0_8
588 EPILOGUE_2_ARGS
589ENDPROC iemAImpl_xchg_u8
590
591BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
592 PROLOGUE_2_ARGS
593 mov T0_16, [A1]
594 xchg [A0], T0_16
595 mov [A1], T0_16
596 EPILOGUE_2_ARGS
597ENDPROC iemAImpl_xchg_u16
598
599BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
600 PROLOGUE_2_ARGS
601 mov T0_32, [A1]
602 xchg [A0], T0_32
603 mov [A1], T0_32
604 EPILOGUE_2_ARGS
605ENDPROC iemAImpl_xchg_u32
606
607%ifdef RT_ARCH_AMD64
608BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
609 PROLOGUE_2_ARGS
610 mov T0, [A1]
611 xchg [A0], T0
612 mov [A1], T0
613 EPILOGUE_2_ARGS
614ENDPROC iemAImpl_xchg_u64
615%endif
616
617
618;
619; XADD for memory operands.
620;
621; Each function takes three arguments, first the pointer to the
622; memory/register, then the pointer to the register, and finally a pointer to
623; eflags. They all return void.
624;
625BEGINCODE
626BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
627 PROLOGUE_3_ARGS
628 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
629 mov T0_8, [A1]
630 xadd [A0], T0_8
631 mov [A1], T0_8
632 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
633 EPILOGUE_3_ARGS
634ENDPROC iemAImpl_xadd_u8
635
636BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
637 PROLOGUE_3_ARGS
638 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
639 mov T0_16, [A1]
640 xadd [A0], T0_16
641 mov [A1], T0_16
642 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
643 EPILOGUE_3_ARGS
644ENDPROC iemAImpl_xadd_u16
645
646BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
647 PROLOGUE_3_ARGS
648 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
649 mov T0_32, [A1]
650 xadd [A0], T0_32
651 mov [A1], T0_32
652 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
653 EPILOGUE_3_ARGS
654ENDPROC iemAImpl_xadd_u32
655
656%ifdef RT_ARCH_AMD64
657BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
658 PROLOGUE_3_ARGS
659 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
660 mov T0, [A1]
661 xadd [A0], T0
662 mov [A1], T0
663 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
664 EPILOGUE_3_ARGS
665ENDPROC iemAImpl_xadd_u64
666%endif ; RT_ARCH_AMD64
667
668BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
669 PROLOGUE_3_ARGS
670 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
671 mov T0_8, [A1]
672 lock xadd [A0], T0_8
673 mov [A1], T0_8
674 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
675 EPILOGUE_3_ARGS
676ENDPROC iemAImpl_xadd_u8_locked
677
678BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
679 PROLOGUE_3_ARGS
680 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
681 mov T0_16, [A1]
682 lock xadd [A0], T0_16
683 mov [A1], T0_16
684 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
685 EPILOGUE_3_ARGS
686ENDPROC iemAImpl_xadd_u16_locked
687
688BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
689 PROLOGUE_3_ARGS
690 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
691 mov T0_32, [A1]
692 lock xadd [A0], T0_32
693 mov [A1], T0_32
694 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
695 EPILOGUE_3_ARGS
696ENDPROC iemAImpl_xadd_u32_locked
697
698%ifdef RT_ARCH_AMD64
699BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
700 PROLOGUE_3_ARGS
701 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
702 mov T0, [A1]
703 lock xadd [A0], T0
704 mov [A1], T0
705 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
706 EPILOGUE_3_ARGS
707ENDPROC iemAImpl_xadd_u64_locked
708%endif ; RT_ARCH_AMD64
709
710
711;
712; CMPXCHG8B.
713;
714; These are tricky register wise, so the code is duplicated for each calling
715; convention.
716;
717; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
718;
719; C-proto:
720; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
721; uint32_t *pEFlags));
722;
723; Note! Identical to iemAImpl_cmpxchg16b.
724;
725BEGINCODE
726BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
727%ifdef RT_ARCH_AMD64
728 %ifdef ASM_CALL64_MSC
729 push rbx
730
731 mov r11, rdx ; pu64EaxEdx (is also T1)
732 mov r10, rcx ; pu64Dst
733
734 mov ebx, [r8]
735 mov ecx, [r8 + 4]
736 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
737 mov eax, [r11]
738 mov edx, [r11 + 4]
739
740 lock cmpxchg8b [r10]
741
742 mov [r11], eax
743 mov [r11 + 4], edx
744 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
745
746 pop rbx
747 ret
748 %else
749 push rbx
750
751 mov r10, rcx ; pEFlags
752 mov r11, rdx ; pu64EbxEcx (is also T1)
753
754 mov ebx, [r11]
755 mov ecx, [r11 + 4]
756 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
757 mov eax, [rsi]
758 mov edx, [rsi + 4]
759
760 lock cmpxchg8b [rdi]
761
762 mov [rsi], eax
763 mov [rsi + 4], edx
764 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
765
766 pop rbx
767 ret
768
769 %endif
770%else
771 push esi
772 push edi
773 push ebx
774 push ebp
775
776 mov edi, ecx ; pu64Dst
777 mov esi, edx ; pu64EaxEdx
778 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
779 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
780
781 mov ebx, [ecx]
782 mov ecx, [ecx + 4]
783 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
784 mov eax, [esi]
785 mov edx, [esi + 4]
786
787 lock cmpxchg8b [edi]
788
789 mov [esi], eax
790 mov [esi + 4], edx
791 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
792
793 pop ebp
794 pop ebx
795 pop edi
796 pop esi
797 ret 8
798%endif
799ENDPROC iemAImpl_cmpxchg8b
800
801BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
802 ; Lazy bird always lock prefixes cmpxchg8b.
803 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
804ENDPROC iemAImpl_cmpxchg8b_locked
805
806%ifdef RT_ARCH_AMD64
807
808;
809; CMPXCHG16B.
810;
811; These are tricky register wise, so the code is duplicated for each calling
812; convention.
813;
814; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
815;
816; C-proto:
817; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
818; uint32_t *pEFlags));
819;
820; Note! Identical to iemAImpl_cmpxchg8b.
821;
822BEGINCODE
823BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
824 %ifdef ASM_CALL64_MSC
825 push rbx
826
827 mov r11, rdx ; pu64RaxRdx (is also T1)
828 mov r10, rcx ; pu64Dst
829
830 mov rbx, [r8]
831 mov rcx, [r8 + 8]
832 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
833 mov rax, [r11]
834 mov rdx, [r11 + 8]
835
836 lock cmpxchg16b [r10]
837
838 mov [r11], rax
839 mov [r11 + 8], rdx
840 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
841
842 pop rbx
843 ret
844 %else
845 push rbx
846
847 mov r10, rcx ; pEFlags
848 mov r11, rdx ; pu64RbxRcx (is also T1)
849
850 mov rbx, [r11]
851 mov rcx, [r11 + 8]
852 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
853 mov rax, [rsi]
854 mov rdx, [rsi + 8]
855
856 lock cmpxchg16b [rdi]
857
858 mov [rsi], eax
859 mov [rsi + 8], edx
860 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
861
862 pop rbx
863 ret
864
865 %endif
866ENDPROC iemAImpl_cmpxchg16b
867
868BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
869 ; Lazy bird always lock prefixes cmpxchg8b.
870 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
871ENDPROC iemAImpl_cmpxchg16b_locked
872
873%endif ; RT_ARCH_AMD64
874
875
876;
877; CMPXCHG.
878;
879; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
880;
881; C-proto:
882; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
883;
884BEGINCODE
885%macro IEMIMPL_CMPXCHG 2
886BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
887 PROLOGUE_4_ARGS
888 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
889 mov al, [A1]
890 %1 cmpxchg [A0], A2_8
891 mov [A1], al
892 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
893 EPILOGUE_4_ARGS
894ENDPROC iemAImpl_cmpxchg_u8 %+ %2
895
896BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
897 PROLOGUE_4_ARGS
898 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
899 mov ax, [A1]
900 %1 cmpxchg [A0], A2_16
901 mov [A1], ax
902 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
903 EPILOGUE_4_ARGS
904ENDPROC iemAImpl_cmpxchg_u16 %+ %2
905
906BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
907 PROLOGUE_4_ARGS
908 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
909 mov eax, [A1]
910 %1 cmpxchg [A0], A2_32
911 mov [A1], eax
912 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
913 EPILOGUE_4_ARGS
914ENDPROC iemAImpl_cmpxchg_u32 %+ %2
915
916BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
917%ifdef RT_ARCH_AMD64
918 PROLOGUE_4_ARGS
919 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
920 mov rax, [A1]
921 %1 cmpxchg [A0], A2
922 mov [A1], rax
923 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
924 EPILOGUE_4_ARGS
925%else
926 ;
927 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
928 ;
929 push esi
930 push edi
931 push ebx
932 push ebp
933
934 mov edi, ecx ; pu64Dst
935 mov esi, edx ; pu64Rax
936 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
937 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
938
939 mov ebx, [ecx]
940 mov ecx, [ecx + 4]
941 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
942 mov eax, [esi]
943 mov edx, [esi + 4]
944
945 lock cmpxchg8b [edi]
946
947 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
948 jz .cmpxchg8b_not_equal
949 cmp eax, eax ; just set the other flags.
950.store:
951 mov [esi], eax
952 mov [esi + 4], edx
953 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
954
955 pop ebp
956 pop ebx
957 pop edi
958 pop esi
959 ret 8
960
961.cmpxchg8b_not_equal:
962 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
963 jne .store
964 cmp [esi], eax
965 jmp .store
966
967%endif
968ENDPROC iemAImpl_cmpxchg_u64 %+ %2
969%endmacro ; IEMIMPL_CMPXCHG
970
971IEMIMPL_CMPXCHG , ,
972IEMIMPL_CMPXCHG lock, _locked
973
974;;
975; Macro for implementing a unary operator.
976;
977; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
978; variants, except on 32-bit system where the 64-bit accesses requires hand
979; coding.
980;
981; All the functions takes a pointer to the destination memory operand in A0,
982; the source register operand in A1 and a pointer to eflags in A2.
983;
984; @param 1 The instruction mnemonic.
985; @param 2 The modified flags.
986; @param 3 The undefined flags.
987;
988%macro IEMIMPL_UNARY_OP 3
989BEGINCODE
990BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
991 PROLOGUE_2_ARGS
992 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
993 %1 byte [A0]
994 IEM_SAVE_FLAGS A1, %2, %3
995 EPILOGUE_2_ARGS
996ENDPROC iemAImpl_ %+ %1 %+ _u8
997
998BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
999 PROLOGUE_2_ARGS
1000 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1001 lock %1 byte [A0]
1002 IEM_SAVE_FLAGS A1, %2, %3
1003 EPILOGUE_2_ARGS
1004ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1005
1006BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1007 PROLOGUE_2_ARGS
1008 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1009 %1 word [A0]
1010 IEM_SAVE_FLAGS A1, %2, %3
1011 EPILOGUE_2_ARGS
1012ENDPROC iemAImpl_ %+ %1 %+ _u16
1013
1014BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1015 PROLOGUE_2_ARGS
1016 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1017 lock %1 word [A0]
1018 IEM_SAVE_FLAGS A1, %2, %3
1019 EPILOGUE_2_ARGS
1020ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1021
1022BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1023 PROLOGUE_2_ARGS
1024 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1025 %1 dword [A0]
1026 IEM_SAVE_FLAGS A1, %2, %3
1027 EPILOGUE_2_ARGS
1028ENDPROC iemAImpl_ %+ %1 %+ _u32
1029
1030BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1031 PROLOGUE_2_ARGS
1032 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1033 lock %1 dword [A0]
1034 IEM_SAVE_FLAGS A1, %2, %3
1035 EPILOGUE_2_ARGS
1036ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1037
1038 %ifdef RT_ARCH_AMD64
1039BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1040 PROLOGUE_2_ARGS
1041 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1042 %1 qword [A0]
1043 IEM_SAVE_FLAGS A1, %2, %3
1044 EPILOGUE_2_ARGS
1045ENDPROC iemAImpl_ %+ %1 %+ _u64
1046
1047BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1048 PROLOGUE_2_ARGS
1049 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1050 lock %1 qword [A0]
1051 IEM_SAVE_FLAGS A1, %2, %3
1052 EPILOGUE_2_ARGS
1053ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1054 %endif ; RT_ARCH_AMD64
1055
1056%endmacro
1057
1058IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1059IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1060IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1061IEMIMPL_UNARY_OP not, 0, 0
1062
1063
1064;;
1065; Macro for implementing memory fence operation.
1066;
1067; No return value, no operands or anything.
1068;
1069; @param 1 The instruction.
1070;
1071%macro IEMIMPL_MEM_FENCE 1
1072BEGINCODE
1073BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1074 %1
1075 ret
1076ENDPROC iemAImpl_ %+ %1
1077%endmacro
1078
1079IEMIMPL_MEM_FENCE lfence
1080IEMIMPL_MEM_FENCE sfence
1081IEMIMPL_MEM_FENCE mfence
1082
1083;;
1084; Alternative for non-SSE2 host.
1085;
1086BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1087 push xAX
1088 xchg xAX, [xSP]
1089 add xSP, xCB
1090 ret
1091ENDPROC iemAImpl_alt_mem_fence
1092
1093
1094
1095;;
1096; Macro for implementing a shift operation.
1097;
1098; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1099; 32-bit system where the 64-bit accesses requires hand coding.
1100;
1101; All the functions takes a pointer to the destination memory operand in A0,
1102; the shift count in A1 and a pointer to eflags in A2.
1103;
1104; @param 1 The instruction mnemonic.
1105; @param 2 The modified flags.
1106; @param 3 The undefined flags.
1107;
1108; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1109;
1110%macro IEMIMPL_SHIFT_OP 3
1111BEGINCODE
1112BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1113 PROLOGUE_3_ARGS
1114 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1115 %ifdef ASM_CALL64_GCC
1116 mov cl, A1_8
1117 %1 byte [A0], cl
1118 %else
1119 xchg A1, A0
1120 %1 byte [A1], cl
1121 %endif
1122 IEM_SAVE_FLAGS A2, %2, %3
1123 EPILOGUE_3_ARGS
1124ENDPROC iemAImpl_ %+ %1 %+ _u8
1125
1126BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1127 PROLOGUE_3_ARGS
1128 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1129 %ifdef ASM_CALL64_GCC
1130 mov cl, A1_8
1131 %1 word [A0], cl
1132 %else
1133 xchg A1, A0
1134 %1 word [A1], cl
1135 %endif
1136 IEM_SAVE_FLAGS A2, %2, %3
1137 EPILOGUE_3_ARGS
1138ENDPROC iemAImpl_ %+ %1 %+ _u16
1139
1140BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1141 PROLOGUE_3_ARGS
1142 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1143 %ifdef ASM_CALL64_GCC
1144 mov cl, A1_8
1145 %1 dword [A0], cl
1146 %else
1147 xchg A1, A0
1148 %1 dword [A1], cl
1149 %endif
1150 IEM_SAVE_FLAGS A2, %2, %3
1151 EPILOGUE_3_ARGS
1152ENDPROC iemAImpl_ %+ %1 %+ _u32
1153
1154 %ifdef RT_ARCH_AMD64
1155BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1156 PROLOGUE_3_ARGS
1157 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1158 %ifdef ASM_CALL64_GCC
1159 mov cl, A1_8
1160 %1 qword [A0], cl
1161 %else
1162 xchg A1, A0
1163 %1 qword [A1], cl
1164 %endif
1165 IEM_SAVE_FLAGS A2, %2, %3
1166 EPILOGUE_3_ARGS
1167ENDPROC iemAImpl_ %+ %1 %+ _u64
1168 %endif ; RT_ARCH_AMD64
1169
1170%endmacro
1171
1172IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1173IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1174IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1175IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1176IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1177IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1178IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1179
1180
1181;;
1182; Macro for implementing a double precision shift operation.
1183;
1184; This will generate code for the 16, 32 and 64 bit accesses, except on
1185; 32-bit system where the 64-bit accesses requires hand coding.
1186;
1187; The functions takes the destination operand (r/m) in A0, the source (reg) in
1188; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1189;
1190; @param 1 The instruction mnemonic.
1191; @param 2 The modified flags.
1192; @param 3 The undefined flags.
1193;
1194; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1195;
1196%macro IEMIMPL_SHIFT_DBL_OP 3
1197BEGINCODE
1198BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1199 PROLOGUE_4_ARGS
1200 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1201 %ifdef ASM_CALL64_GCC
1202 xchg A3, A2
1203 %1 [A0], A1_16, cl
1204 xchg A3, A2
1205 %else
1206 xchg A0, A2
1207 %1 [A2], A1_16, cl
1208 %endif
1209 IEM_SAVE_FLAGS A3, %2, %3
1210 EPILOGUE_4_ARGS
1211ENDPROC iemAImpl_ %+ %1 %+ _u16
1212
1213BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1214 PROLOGUE_4_ARGS
1215 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1216 %ifdef ASM_CALL64_GCC
1217 xchg A3, A2
1218 %1 [A0], A1_32, cl
1219 xchg A3, A2
1220 %else
1221 xchg A0, A2
1222 %1 [A2], A1_32, cl
1223 %endif
1224 IEM_SAVE_FLAGS A3, %2, %3
1225 EPILOGUE_4_ARGS
1226ENDPROC iemAImpl_ %+ %1 %+ _u32
1227
1228 %ifdef RT_ARCH_AMD64
1229BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1230 PROLOGUE_4_ARGS
1231 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1232 %ifdef ASM_CALL64_GCC
1233 xchg A3, A2
1234 %1 [A0], A1, cl
1235 xchg A3, A2
1236 %else
1237 xchg A0, A2
1238 %1 [A2], A1, cl
1239 %endif
1240 IEM_SAVE_FLAGS A3, %2, %3
1241 EPILOGUE_4_ARGS_EX 12
1242ENDPROC iemAImpl_ %+ %1 %+ _u64
1243 %endif ; RT_ARCH_AMD64
1244
1245%endmacro
1246
1247IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1248IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1249
1250
1251;;
1252; Macro for implementing a multiplication operations.
1253;
1254; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1255; 32-bit system where the 64-bit accesses requires hand coding.
1256;
1257; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1258; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1259; pointer to eflags in A3.
1260;
1261; The functions all return 0 so the caller can be used for div/idiv as well as
1262; for the mul/imul implementation.
1263;
1264; @param 1 The instruction mnemonic.
1265; @param 2 The modified flags.
1266; @param 3 The undefined flags.
1267;
1268; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1269;
1270%macro IEMIMPL_MUL_OP 3
1271BEGINCODE
1272BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1273 PROLOGUE_3_ARGS
1274 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1275 mov al, [A0]
1276 %1 A1_8
1277 mov [A0], ax
1278 IEM_SAVE_FLAGS A2, %2, %3
1279 xor eax, eax
1280 EPILOGUE_3_ARGS
1281ENDPROC iemAImpl_ %+ %1 %+ _u8
1282
1283BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1284 PROLOGUE_4_ARGS
1285 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1286 mov ax, [A0]
1287 %ifdef ASM_CALL64_GCC
1288 %1 A2_16
1289 mov [A0], ax
1290 mov [A1], dx
1291 %else
1292 mov T1, A1
1293 %1 A2_16
1294 mov [A0], ax
1295 mov [T1], dx
1296 %endif
1297 IEM_SAVE_FLAGS A3, %2, %3
1298 xor eax, eax
1299 EPILOGUE_4_ARGS
1300ENDPROC iemAImpl_ %+ %1 %+ _u16
1301
1302BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1303 PROLOGUE_4_ARGS
1304 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1305 mov eax, [A0]
1306 %ifdef ASM_CALL64_GCC
1307 %1 A2_32
1308 mov [A0], eax
1309 mov [A1], edx
1310 %else
1311 mov T1, A1
1312 %1 A2_32
1313 mov [A0], eax
1314 mov [T1], edx
1315 %endif
1316 IEM_SAVE_FLAGS A3, %2, %3
1317 xor eax, eax
1318 EPILOGUE_4_ARGS
1319ENDPROC iemAImpl_ %+ %1 %+ _u32
1320
1321 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1322BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1323 PROLOGUE_4_ARGS
1324 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1325 mov rax, [A0]
1326 %ifdef ASM_CALL64_GCC
1327 %1 A2
1328 mov [A0], rax
1329 mov [A1], rdx
1330 %else
1331 mov T1, A1
1332 %1 A2
1333 mov [A0], rax
1334 mov [T1], rdx
1335 %endif
1336 IEM_SAVE_FLAGS A3, %2, %3
1337 xor eax, eax
1338 EPILOGUE_4_ARGS_EX 12
1339ENDPROC iemAImpl_ %+ %1 %+ _u64
1340 %endif ; !RT_ARCH_AMD64
1341
1342%endmacro
1343
1344IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1345IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1346
1347
1348BEGINCODE
1349;;
1350; Worker function for negating a 32-bit number in T1:T0
1351; @uses None (T0,T1)
1352iemAImpl_negate_T0_T1_u32:
1353 push 0
1354 push 0
1355 xchg T0_32, [xSP]
1356 xchg T1_32, [xSP + xCB]
1357 sub T0_32, [xSP]
1358 sbb T1_32, [xSP + xCB]
1359 add xSP, xCB*2
1360 ret
1361
1362%ifdef RT_ARCH_AMD64
1363;;
1364; Worker function for negating a 64-bit number in T1:T0
1365; @uses None (T0,T1)
1366iemAImpl_negate_T0_T1_u64:
1367 push 0
1368 push 0
1369 xchg T0, [xSP]
1370 xchg T1, [xSP + xCB]
1371 sub T0, [xSP]
1372 sbb T1, [xSP + xCB]
1373 add xSP, xCB*2
1374 ret
1375%endif
1376
1377
1378;;
1379; Macro for implementing a division operations.
1380;
1381; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1382; 32-bit system where the 64-bit accesses requires hand coding.
1383;
1384; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1385; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1386; pointer to eflags in A3.
1387;
1388; The functions all return 0 on success and -1 if a divide error should be
1389; raised by the caller.
1390;
1391; @param 1 The instruction mnemonic.
1392; @param 2 The modified flags.
1393; @param 3 The undefined flags.
1394; @param 4 1 if signed, 0 if unsigned.
1395;
1396; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1397;
1398%macro IEMIMPL_DIV_OP 4
1399BEGINCODE
1400BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1401 PROLOGUE_3_ARGS
1402
1403 ; div by chainsaw check.
1404 test A1_8, A1_8
1405 jz .div_zero
1406
1407 ; Overflow check - unsigned division is simple to verify, haven't
1408 ; found a simple way to check signed division yet unfortunately.
1409 %if %4 == 0
1410 cmp [A0 + 1], A1_8
1411 jae .div_overflow
1412 %else
1413 mov T0_16, [A0] ; T0 = dividend
1414 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1415 test A1_8, A1_8
1416 js .divisor_negative
1417 test T0_16, T0_16
1418 jns .both_positive
1419 neg T0_16
1420.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1421 push T0 ; Start off like unsigned below.
1422 shr T0_16, 7
1423 cmp T0_8, A1_8
1424 pop T0
1425 jb .div_no_overflow
1426 ja .div_overflow
1427 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1428 cmp T0_8, A1_8
1429 jae .div_overflow
1430 jmp .div_no_overflow
1431
1432.divisor_negative:
1433 neg A1_8
1434 test T0_16, T0_16
1435 jns .one_of_each
1436 neg T0_16
1437.both_positive: ; Same as unsigned shifted by sign indicator bit.
1438 shr T0_16, 7
1439 cmp T0_8, A1_8
1440 jae .div_overflow
1441.div_no_overflow:
1442 mov A1, T1 ; restore divisor
1443 %endif
1444
1445 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1446 mov ax, [A0]
1447 %1 A1_8
1448 mov [A0], ax
1449 IEM_SAVE_FLAGS A2, %2, %3
1450 xor eax, eax
1451
1452.return:
1453 EPILOGUE_3_ARGS
1454
1455.div_zero:
1456.div_overflow:
1457 mov eax, -1
1458 jmp .return
1459ENDPROC iemAImpl_ %+ %1 %+ _u8
1460
1461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1462 PROLOGUE_4_ARGS
1463
1464 ; div by chainsaw check.
1465 test A2_16, A2_16
1466 jz .div_zero
1467
1468 ; Overflow check - unsigned division is simple to verify, haven't
1469 ; found a simple way to check signed division yet unfortunately.
1470 %if %4 == 0
1471 cmp [A1], A2_16
1472 jae .div_overflow
1473 %else
1474 mov T0_16, [A1]
1475 shl T0_32, 16
1476 mov T0_16, [A0] ; T0 = dividend
1477 mov T1, A2 ; T1 = divisor
1478 test T1_16, T1_16
1479 js .divisor_negative
1480 test T0_32, T0_32
1481 jns .both_positive
1482 neg T0_32
1483.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1484 push T0 ; Start off like unsigned below.
1485 shr T0_32, 15
1486 cmp T0_16, T1_16
1487 pop T0
1488 jb .div_no_overflow
1489 ja .div_overflow
1490 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1491 cmp T0_16, T1_16
1492 jae .div_overflow
1493 jmp .div_no_overflow
1494
1495.divisor_negative:
1496 neg T1_16
1497 test T0_32, T0_32
1498 jns .one_of_each
1499 neg T0_32
1500.both_positive: ; Same as unsigned shifted by sign indicator bit.
1501 shr T0_32, 15
1502 cmp T0_16, T1_16
1503 jae .div_overflow
1504.div_no_overflow:
1505 %endif
1506
1507 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1508 %ifdef ASM_CALL64_GCC
1509 mov T1, A2
1510 mov ax, [A0]
1511 mov dx, [A1]
1512 %1 T1_16
1513 mov [A0], ax
1514 mov [A1], dx
1515 %else
1516 mov T1, A1
1517 mov ax, [A0]
1518 mov dx, [T1]
1519 %1 A2_16
1520 mov [A0], ax
1521 mov [T1], dx
1522 %endif
1523 IEM_SAVE_FLAGS A3, %2, %3
1524 xor eax, eax
1525
1526.return:
1527 EPILOGUE_4_ARGS
1528
1529.div_zero:
1530.div_overflow:
1531 mov eax, -1
1532 jmp .return
1533ENDPROC iemAImpl_ %+ %1 %+ _u16
1534
1535BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1536 PROLOGUE_4_ARGS
1537
1538 ; div by chainsaw check.
1539 test A2_32, A2_32
1540 jz .div_zero
1541
1542 ; Overflow check - unsigned division is simple to verify, haven't
1543 ; found a simple way to check signed division yet unfortunately.
1544 %if %4 == 0
1545 cmp [A1], A2_32
1546 jae .div_overflow
1547 %else
1548 push A2 ; save A2 so we modify it (we out of regs on x86).
1549 mov T0_32, [A0] ; T0 = dividend low
1550 mov T1_32, [A1] ; T1 = dividend high
1551 test A2_32, A2_32
1552 js .divisor_negative
1553 test T1_32, T1_32
1554 jns .both_positive
1555 call iemAImpl_negate_T0_T1_u32
1556.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1557 push T0 ; Start off like unsigned below.
1558 shl T1_32, 1
1559 shr T0_32, 31
1560 or T1_32, T0_32
1561 cmp T1_32, A2_32
1562 pop T0
1563 jb .div_no_overflow
1564 ja .div_overflow
1565 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1566 cmp T0_32, A2_32
1567 jae .div_overflow
1568 jmp .div_no_overflow
1569
1570.divisor_negative:
1571 neg A2_32
1572 test T1_32, T1_32
1573 jns .one_of_each
1574 call iemAImpl_negate_T0_T1_u32
1575.both_positive: ; Same as unsigned shifted by sign indicator bit.
1576 shl T1_32, 1
1577 shr T0_32, 31
1578 or T1_32, T0_32
1579 cmp T1_32, A2_32
1580 jae .div_overflow
1581.div_no_overflow:
1582 pop A2
1583 %endif
1584
1585 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1586 mov eax, [A0]
1587 %ifdef ASM_CALL64_GCC
1588 mov T1, A2
1589 mov eax, [A0]
1590 mov edx, [A1]
1591 %1 T1_32
1592 mov [A0], eax
1593 mov [A1], edx
1594 %else
1595 mov T1, A1
1596 mov eax, [A0]
1597 mov edx, [T1]
1598 %1 A2_32
1599 mov [A0], eax
1600 mov [T1], edx
1601 %endif
1602 IEM_SAVE_FLAGS A3, %2, %3
1603 xor eax, eax
1604
1605.return:
1606 EPILOGUE_4_ARGS
1607
1608.div_overflow:
1609 %if %4 != 0
1610 pop A2
1611 %endif
1612.div_zero:
1613 mov eax, -1
1614 jmp .return
1615ENDPROC iemAImpl_ %+ %1 %+ _u32
1616
1617 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1618BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1619 PROLOGUE_4_ARGS
1620
1621 test A2, A2
1622 jz .div_zero
1623 %if %4 == 0
1624 cmp [A1], A2
1625 jae .div_overflow
1626 %else
1627 push A2 ; save A2 so we modify it (we out of regs on x86).
1628 mov T0, [A0] ; T0 = dividend low
1629 mov T1, [A1] ; T1 = dividend high
1630 test A2, A2
1631 js .divisor_negative
1632 test T1, T1
1633 jns .both_positive
1634 call iemAImpl_negate_T0_T1_u64
1635.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1636 push T0 ; Start off like unsigned below.
1637 shl T1, 1
1638 shr T0, 63
1639 or T1, T0
1640 cmp T1, A2
1641 pop T0
1642 jb .div_no_overflow
1643 ja .div_overflow
1644 mov T1, 0x7fffffffffffffff
1645 and T0, T1 ; Special case for covering (divisor - 1).
1646 cmp T0, A2
1647 jae .div_overflow
1648 jmp .div_no_overflow
1649
1650.divisor_negative:
1651 neg A2
1652 test T1, T1
1653 jns .one_of_each
1654 call iemAImpl_negate_T0_T1_u64
1655.both_positive: ; Same as unsigned shifted by sign indicator bit.
1656 shl T1, 1
1657 shr T0, 63
1658 or T1, T0
1659 cmp T1, A2
1660 jae .div_overflow
1661.div_no_overflow:
1662 pop A2
1663 %endif
1664
1665 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1666 mov rax, [A0]
1667 %ifdef ASM_CALL64_GCC
1668 mov T1, A2
1669 mov rax, [A0]
1670 mov rdx, [A1]
1671 %1 T1
1672 mov [A0], rax
1673 mov [A1], rdx
1674 %else
1675 mov T1, A1
1676 mov rax, [A0]
1677 mov rdx, [T1]
1678 %1 A2
1679 mov [A0], rax
1680 mov [T1], rdx
1681 %endif
1682 IEM_SAVE_FLAGS A3, %2, %3
1683 xor eax, eax
1684
1685.return:
1686 EPILOGUE_4_ARGS_EX 12
1687
1688.div_overflow:
1689 %if %4 != 0
1690 pop A2
1691 %endif
1692.div_zero:
1693 mov eax, -1
1694 jmp .return
1695ENDPROC iemAImpl_ %+ %1 %+ _u64
1696 %endif ; !RT_ARCH_AMD64
1697
1698%endmacro
1699
1700IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1701IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1702
1703
1704;
1705; BSWAP. No flag changes.
1706;
1707; Each function takes one argument, pointer to the value to bswap
1708; (input/output). They all return void.
1709;
1710BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1711 PROLOGUE_1_ARGS
1712 mov T0_32, [A0] ; just in case any of the upper bits are used.
1713 db 66h
1714 bswap T0_32
1715 mov [A0], T0_32
1716 EPILOGUE_1_ARGS
1717ENDPROC iemAImpl_bswap_u16
1718
1719BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1720 PROLOGUE_1_ARGS
1721 mov T0_32, [A0]
1722 bswap T0_32
1723 mov [A0], T0_32
1724 EPILOGUE_1_ARGS
1725ENDPROC iemAImpl_bswap_u32
1726
1727BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1728%ifdef RT_ARCH_AMD64
1729 PROLOGUE_1_ARGS
1730 mov T0, [A0]
1731 bswap T0
1732 mov [A0], T0
1733 EPILOGUE_1_ARGS
1734%else
1735 PROLOGUE_1_ARGS
1736 mov T0, [A0]
1737 mov T1, [A0 + 4]
1738 bswap T0
1739 bswap T1
1740 mov [A0 + 4], T0
1741 mov [A0], T1
1742 EPILOGUE_1_ARGS
1743%endif
1744ENDPROC iemAImpl_bswap_u64
1745
1746
1747;;
1748; Initialize the FPU for the actual instruction being emulated, this means
1749; loading parts of the guest's control word and status word.
1750;
1751; @uses 24 bytes of stack.
1752; @param 1 Expression giving the address of the FXSTATE of the guest.
1753;
1754%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1755 fnstenv [xSP]
1756
1757 ; FCW - for exception, precision and rounding control.
1758 movzx T0, word [%1 + X86FXSTATE.FCW]
1759 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1760 mov [xSP + X86FSTENV32P.FCW], T0_16
1761
1762 ; FSW - for undefined C0, C1, C2, and C3.
1763 movzx T1, word [%1 + X86FXSTATE.FSW]
1764 and T1, X86_FSW_C_MASK
1765 movzx T0, word [xSP + X86FSTENV32P.FSW]
1766 and T0, X86_FSW_TOP_MASK
1767 or T0, T1
1768 mov [xSP + X86FSTENV32P.FSW], T0_16
1769
1770 fldenv [xSP]
1771%endmacro
1772
1773
1774;;
1775; Need to move this as well somewhere better?
1776;
1777struc IEMFPURESULT
1778 .r80Result resw 5
1779 .FSW resw 1
1780endstruc
1781
1782
1783;;
1784; Need to move this as well somewhere better?
1785;
1786struc IEMFPURESULTTWO
1787 .r80Result1 resw 5
1788 .FSW resw 1
1789 .r80Result2 resw 5
1790endstruc
1791
1792
1793;
1794;---------------------- 16-bit signed integer operations ----------------------
1795;
1796
1797
1798;;
1799; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1800;
1801; @param A0 FPU context (fxsave).
1802; @param A1 Pointer to a IEMFPURESULT for the output.
1803; @param A2 Pointer to the 16-bit floating point value to convert.
1804;
1805BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1806 PROLOGUE_3_ARGS
1807 sub xSP, 20h
1808
1809 fninit
1810 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1811 fild word [A2]
1812
1813 fnstsw word [A1 + IEMFPURESULT.FSW]
1814 fnclex
1815 fstp tword [A1 + IEMFPURESULT.r80Result]
1816
1817 fninit
1818 add xSP, 20h
1819 EPILOGUE_3_ARGS
1820ENDPROC iemAImpl_fild_i16_to_r80
1821
1822
1823;;
1824; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1825;
1826; @param A0 FPU context (fxsave).
1827; @param A1 Where to return the output FSW.
1828; @param A2 Where to store the 16-bit signed integer value.
1829; @param A3 Pointer to the 80-bit value.
1830;
1831BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1832 PROLOGUE_4_ARGS
1833 sub xSP, 20h
1834
1835 fninit
1836 fld tword [A3]
1837 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1838 fistp word [A2]
1839
1840 fnstsw word [A1]
1841
1842 fninit
1843 add xSP, 20h
1844 EPILOGUE_4_ARGS
1845ENDPROC iemAImpl_fist_r80_to_i16
1846
1847
1848;;
1849; Store a 80-bit floating point value (register) as a 16-bit signed integer
1850; (memory) with truncation.
1851;
1852; @param A0 FPU context (fxsave).
1853; @param A1 Where to return the output FSW.
1854; @param A2 Where to store the 16-bit signed integer value.
1855; @param A3 Pointer to the 80-bit value.
1856;
1857BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1858 PROLOGUE_4_ARGS
1859 sub xSP, 20h
1860
1861 fninit
1862 fld tword [A3]
1863 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1864 fisttp dword [A2]
1865
1866 fnstsw word [A1]
1867
1868 fninit
1869 add xSP, 20h
1870 EPILOGUE_4_ARGS
1871ENDPROC iemAImpl_fistt_r80_to_i16
1872
1873
1874;;
1875; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1876;
1877; @param 1 The instruction
1878;
1879; @param A0 FPU context (fxsave).
1880; @param A1 Pointer to a IEMFPURESULT for the output.
1881; @param A2 Pointer to the 80-bit value.
1882; @param A3 Pointer to the 16-bit value.
1883;
1884%macro IEMIMPL_FPU_R80_BY_I16 1
1885BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1886 PROLOGUE_4_ARGS
1887 sub xSP, 20h
1888
1889 fninit
1890 fld tword [A2]
1891 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1892 %1 word [A3]
1893
1894 fnstsw word [A1 + IEMFPURESULT.FSW]
1895 fnclex
1896 fstp tword [A1 + IEMFPURESULT.r80Result]
1897
1898 fninit
1899 add xSP, 20h
1900 EPILOGUE_4_ARGS
1901ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1902%endmacro
1903
1904IEMIMPL_FPU_R80_BY_I16 fiadd
1905IEMIMPL_FPU_R80_BY_I16 fimul
1906IEMIMPL_FPU_R80_BY_I16 fisub
1907IEMIMPL_FPU_R80_BY_I16 fisubr
1908IEMIMPL_FPU_R80_BY_I16 fidiv
1909IEMIMPL_FPU_R80_BY_I16 fidivr
1910
1911
1912;;
1913; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1914; only returning FSW.
1915;
1916; @param 1 The instruction
1917;
1918; @param A0 FPU context (fxsave).
1919; @param A1 Where to store the output FSW.
1920; @param A2 Pointer to the 80-bit value.
1921; @param A3 Pointer to the 64-bit value.
1922;
1923%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1924BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1925 PROLOGUE_4_ARGS
1926 sub xSP, 20h
1927
1928 fninit
1929 fld tword [A2]
1930 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1931 %1 word [A3]
1932
1933 fnstsw word [A1]
1934
1935 fninit
1936 add xSP, 20h
1937 EPILOGUE_4_ARGS
1938ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1939%endmacro
1940
1941IEMIMPL_FPU_R80_BY_I16_FSW ficom
1942
1943
1944
1945;
1946;---------------------- 32-bit signed integer operations ----------------------
1947;
1948
1949
1950;;
1951; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1952;
1953; @param A0 FPU context (fxsave).
1954; @param A1 Pointer to a IEMFPURESULT for the output.
1955; @param A2 Pointer to the 32-bit floating point value to convert.
1956;
1957BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1958 PROLOGUE_3_ARGS
1959 sub xSP, 20h
1960
1961 fninit
1962 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1963 fild dword [A2]
1964
1965 fnstsw word [A1 + IEMFPURESULT.FSW]
1966 fnclex
1967 fstp tword [A1 + IEMFPURESULT.r80Result]
1968
1969 fninit
1970 add xSP, 20h
1971 EPILOGUE_3_ARGS
1972ENDPROC iemAImpl_fild_i32_to_r80
1973
1974
1975;;
1976; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1977;
1978; @param A0 FPU context (fxsave).
1979; @param A1 Where to return the output FSW.
1980; @param A2 Where to store the 32-bit signed integer value.
1981; @param A3 Pointer to the 80-bit value.
1982;
1983BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1984 PROLOGUE_4_ARGS
1985 sub xSP, 20h
1986
1987 fninit
1988 fld tword [A3]
1989 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1990 fistp dword [A2]
1991
1992 fnstsw word [A1]
1993
1994 fninit
1995 add xSP, 20h
1996 EPILOGUE_4_ARGS
1997ENDPROC iemAImpl_fist_r80_to_i32
1998
1999
2000;;
2001; Store a 80-bit floating point value (register) as a 32-bit signed integer
2002; (memory) with truncation.
2003;
2004; @param A0 FPU context (fxsave).
2005; @param A1 Where to return the output FSW.
2006; @param A2 Where to store the 32-bit signed integer value.
2007; @param A3 Pointer to the 80-bit value.
2008;
2009BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2010 PROLOGUE_4_ARGS
2011 sub xSP, 20h
2012
2013 fninit
2014 fld tword [A3]
2015 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2016 fisttp dword [A2]
2017
2018 fnstsw word [A1]
2019
2020 fninit
2021 add xSP, 20h
2022 EPILOGUE_4_ARGS
2023ENDPROC iemAImpl_fistt_r80_to_i32
2024
2025
2026;;
2027; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2028;
2029; @param 1 The instruction
2030;
2031; @param A0 FPU context (fxsave).
2032; @param A1 Pointer to a IEMFPURESULT for the output.
2033; @param A2 Pointer to the 80-bit value.
2034; @param A3 Pointer to the 32-bit value.
2035;
2036%macro IEMIMPL_FPU_R80_BY_I32 1
2037BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2038 PROLOGUE_4_ARGS
2039 sub xSP, 20h
2040
2041 fninit
2042 fld tword [A2]
2043 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2044 %1 dword [A3]
2045
2046 fnstsw word [A1 + IEMFPURESULT.FSW]
2047 fnclex
2048 fstp tword [A1 + IEMFPURESULT.r80Result]
2049
2050 fninit
2051 add xSP, 20h
2052 EPILOGUE_4_ARGS
2053ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2054%endmacro
2055
2056IEMIMPL_FPU_R80_BY_I32 fiadd
2057IEMIMPL_FPU_R80_BY_I32 fimul
2058IEMIMPL_FPU_R80_BY_I32 fisub
2059IEMIMPL_FPU_R80_BY_I32 fisubr
2060IEMIMPL_FPU_R80_BY_I32 fidiv
2061IEMIMPL_FPU_R80_BY_I32 fidivr
2062
2063
2064;;
2065; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2066; only returning FSW.
2067;
2068; @param 1 The instruction
2069;
2070; @param A0 FPU context (fxsave).
2071; @param A1 Where to store the output FSW.
2072; @param A2 Pointer to the 80-bit value.
2073; @param A3 Pointer to the 64-bit value.
2074;
2075%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2076BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2077 PROLOGUE_4_ARGS
2078 sub xSP, 20h
2079
2080 fninit
2081 fld tword [A2]
2082 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2083 %1 dword [A3]
2084
2085 fnstsw word [A1]
2086
2087 fninit
2088 add xSP, 20h
2089 EPILOGUE_4_ARGS
2090ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2091%endmacro
2092
2093IEMIMPL_FPU_R80_BY_I32_FSW ficom
2094
2095
2096
2097;
2098;---------------------- 64-bit signed integer operations ----------------------
2099;
2100
2101
2102;;
2103; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2104;
2105; @param A0 FPU context (fxsave).
2106; @param A1 Pointer to a IEMFPURESULT for the output.
2107; @param A2 Pointer to the 64-bit floating point value to convert.
2108;
2109BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2110 PROLOGUE_3_ARGS
2111 sub xSP, 20h
2112
2113 fninit
2114 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2115 fild qword [A2]
2116
2117 fnstsw word [A1 + IEMFPURESULT.FSW]
2118 fnclex
2119 fstp tword [A1 + IEMFPURESULT.r80Result]
2120
2121 fninit
2122 add xSP, 20h
2123 EPILOGUE_3_ARGS
2124ENDPROC iemAImpl_fild_i64_to_r80
2125
2126
2127;;
2128; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2129;
2130; @param A0 FPU context (fxsave).
2131; @param A1 Where to return the output FSW.
2132; @param A2 Where to store the 64-bit signed integer value.
2133; @param A3 Pointer to the 80-bit value.
2134;
2135BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2136 PROLOGUE_4_ARGS
2137 sub xSP, 20h
2138
2139 fninit
2140 fld tword [A3]
2141 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2142 fistp qword [A2]
2143
2144 fnstsw word [A1]
2145
2146 fninit
2147 add xSP, 20h
2148 EPILOGUE_4_ARGS
2149ENDPROC iemAImpl_fist_r80_to_i64
2150
2151
2152;;
2153; Store a 80-bit floating point value (register) as a 64-bit signed integer
2154; (memory) with truncation.
2155;
2156; @param A0 FPU context (fxsave).
2157; @param A1 Where to return the output FSW.
2158; @param A2 Where to store the 64-bit signed integer value.
2159; @param A3 Pointer to the 80-bit value.
2160;
2161BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2162 PROLOGUE_4_ARGS
2163 sub xSP, 20h
2164
2165 fninit
2166 fld tword [A3]
2167 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2168 fisttp qword [A2]
2169
2170 fnstsw word [A1]
2171
2172 fninit
2173 add xSP, 20h
2174 EPILOGUE_4_ARGS
2175ENDPROC iemAImpl_fistt_r80_to_i64
2176
2177
2178
2179;
2180;---------------------- 32-bit floating point operations ----------------------
2181;
2182
2183;;
2184; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2185;
2186; @param A0 FPU context (fxsave).
2187; @param A1 Pointer to a IEMFPURESULT for the output.
2188; @param A2 Pointer to the 32-bit floating point value to convert.
2189;
2190BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2191 PROLOGUE_3_ARGS
2192 sub xSP, 20h
2193
2194 fninit
2195 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2196 fld dword [A2]
2197
2198 fnstsw word [A1 + IEMFPURESULT.FSW]
2199 fnclex
2200 fstp tword [A1 + IEMFPURESULT.r80Result]
2201
2202 fninit
2203 add xSP, 20h
2204 EPILOGUE_3_ARGS
2205ENDPROC iemAImpl_fld_r32_to_r80
2206
2207
2208;;
2209; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2210;
2211; @param A0 FPU context (fxsave).
2212; @param A1 Where to return the output FSW.
2213; @param A2 Where to store the 32-bit value.
2214; @param A3 Pointer to the 80-bit value.
2215;
2216BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2217 PROLOGUE_4_ARGS
2218 sub xSP, 20h
2219
2220 fninit
2221 fld tword [A3]
2222 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2223 fst dword [A2]
2224
2225 fnstsw word [A1]
2226
2227 fninit
2228 add xSP, 20h
2229 EPILOGUE_4_ARGS
2230ENDPROC iemAImpl_fst_r80_to_r32
2231
2232
2233;;
2234; FPU instruction working on one 80-bit and one 32-bit floating point value.
2235;
2236; @param 1 The instruction
2237;
2238; @param A0 FPU context (fxsave).
2239; @param A1 Pointer to a IEMFPURESULT for the output.
2240; @param A2 Pointer to the 80-bit value.
2241; @param A3 Pointer to the 32-bit value.
2242;
2243%macro IEMIMPL_FPU_R80_BY_R32 1
2244BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2245 PROLOGUE_4_ARGS
2246 sub xSP, 20h
2247
2248 fninit
2249 fld tword [A2]
2250 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2251 %1 dword [A3]
2252
2253 fnstsw word [A1 + IEMFPURESULT.FSW]
2254 fnclex
2255 fstp tword [A1 + IEMFPURESULT.r80Result]
2256
2257 fninit
2258 add xSP, 20h
2259 EPILOGUE_4_ARGS
2260ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2261%endmacro
2262
2263IEMIMPL_FPU_R80_BY_R32 fadd
2264IEMIMPL_FPU_R80_BY_R32 fmul
2265IEMIMPL_FPU_R80_BY_R32 fsub
2266IEMIMPL_FPU_R80_BY_R32 fsubr
2267IEMIMPL_FPU_R80_BY_R32 fdiv
2268IEMIMPL_FPU_R80_BY_R32 fdivr
2269
2270
2271;;
2272; FPU instruction working on one 80-bit and one 32-bit floating point value,
2273; only returning FSW.
2274;
2275; @param 1 The instruction
2276;
2277; @param A0 FPU context (fxsave).
2278; @param A1 Where to store the output FSW.
2279; @param A2 Pointer to the 80-bit value.
2280; @param A3 Pointer to the 64-bit value.
2281;
2282%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2283BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2284 PROLOGUE_4_ARGS
2285 sub xSP, 20h
2286
2287 fninit
2288 fld tword [A2]
2289 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2290 %1 dword [A3]
2291
2292 fnstsw word [A1]
2293
2294 fninit
2295 add xSP, 20h
2296 EPILOGUE_4_ARGS
2297ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2298%endmacro
2299
2300IEMIMPL_FPU_R80_BY_R32_FSW fcom
2301
2302
2303
2304;
2305;---------------------- 64-bit floating point operations ----------------------
2306;
2307
2308;;
2309; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2310;
2311; @param A0 FPU context (fxsave).
2312; @param A1 Pointer to a IEMFPURESULT for the output.
2313; @param A2 Pointer to the 64-bit floating point value to convert.
2314;
2315BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2316 PROLOGUE_3_ARGS
2317 sub xSP, 20h
2318
2319 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2320 fld qword [A2]
2321
2322 fnstsw word [A1 + IEMFPURESULT.FSW]
2323 fnclex
2324 fstp tword [A1 + IEMFPURESULT.r80Result]
2325
2326 fninit
2327 add xSP, 20h
2328 EPILOGUE_3_ARGS
2329ENDPROC iemAImpl_fld_r64_to_r80
2330
2331
2332;;
2333; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2334;
2335; @param A0 FPU context (fxsave).
2336; @param A1 Where to return the output FSW.
2337; @param A2 Where to store the 64-bit value.
2338; @param A3 Pointer to the 80-bit value.
2339;
2340BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2341 PROLOGUE_4_ARGS
2342 sub xSP, 20h
2343
2344 fninit
2345 fld tword [A3]
2346 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2347 fst qword [A2]
2348
2349 fnstsw word [A1]
2350
2351 fninit
2352 add xSP, 20h
2353 EPILOGUE_4_ARGS
2354ENDPROC iemAImpl_fst_r80_to_r64
2355
2356
2357;;
2358; FPU instruction working on one 80-bit and one 64-bit floating point value.
2359;
2360; @param 1 The instruction
2361;
2362; @param A0 FPU context (fxsave).
2363; @param A1 Pointer to a IEMFPURESULT for the output.
2364; @param A2 Pointer to the 80-bit value.
2365; @param A3 Pointer to the 64-bit value.
2366;
2367%macro IEMIMPL_FPU_R80_BY_R64 1
2368BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2369 PROLOGUE_4_ARGS
2370 sub xSP, 20h
2371
2372 fninit
2373 fld tword [A2]
2374 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2375 %1 qword [A3]
2376
2377 fnstsw word [A1 + IEMFPURESULT.FSW]
2378 fnclex
2379 fstp tword [A1 + IEMFPURESULT.r80Result]
2380
2381 fninit
2382 add xSP, 20h
2383 EPILOGUE_4_ARGS
2384ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2385%endmacro
2386
2387IEMIMPL_FPU_R80_BY_R64 fadd
2388IEMIMPL_FPU_R80_BY_R64 fmul
2389IEMIMPL_FPU_R80_BY_R64 fsub
2390IEMIMPL_FPU_R80_BY_R64 fsubr
2391IEMIMPL_FPU_R80_BY_R64 fdiv
2392IEMIMPL_FPU_R80_BY_R64 fdivr
2393
2394;;
2395; FPU instruction working on one 80-bit and one 64-bit floating point value,
2396; only returning FSW.
2397;
2398; @param 1 The instruction
2399;
2400; @param A0 FPU context (fxsave).
2401; @param A1 Where to store the output FSW.
2402; @param A2 Pointer to the 80-bit value.
2403; @param A3 Pointer to the 64-bit value.
2404;
2405%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2406BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2407 PROLOGUE_4_ARGS
2408 sub xSP, 20h
2409
2410 fninit
2411 fld tword [A2]
2412 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2413 %1 qword [A3]
2414
2415 fnstsw word [A1]
2416
2417 fninit
2418 add xSP, 20h
2419 EPILOGUE_4_ARGS
2420ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2421%endmacro
2422
2423IEMIMPL_FPU_R80_BY_R64_FSW fcom
2424
2425
2426
2427;
2428;---------------------- 80-bit floating point operations ----------------------
2429;
2430
2431;;
2432; Loads a 80-bit floating point register value from memory.
2433;
2434; @param A0 FPU context (fxsave).
2435; @param A1 Pointer to a IEMFPURESULT for the output.
2436; @param A2 Pointer to the 80-bit floating point value to load.
2437;
2438BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2439 PROLOGUE_3_ARGS
2440 sub xSP, 20h
2441
2442 fninit
2443 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2444 fld tword [A2]
2445
2446 fnstsw word [A1 + IEMFPURESULT.FSW]
2447 fnclex
2448 fstp tword [A1 + IEMFPURESULT.r80Result]
2449
2450 fninit
2451 add xSP, 20h
2452 EPILOGUE_3_ARGS
2453ENDPROC iemAImpl_fld_r80_from_r80
2454
2455
2456;;
2457; Store a 80-bit floating point register to memory
2458;
2459; @param A0 FPU context (fxsave).
2460; @param A1 Where to return the output FSW.
2461; @param A2 Where to store the 80-bit value.
2462; @param A3 Pointer to the 80-bit register value.
2463;
2464BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2465 PROLOGUE_4_ARGS
2466 sub xSP, 20h
2467
2468 fninit
2469 fld tword [A3]
2470 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2471 fstp tword [A2]
2472
2473 fnstsw word [A1]
2474
2475 fninit
2476 add xSP, 20h
2477 EPILOGUE_4_ARGS
2478ENDPROC iemAImpl_fst_r80_to_r80
2479
2480
2481;;
2482; FPU instruction working on two 80-bit floating point values.
2483;
2484; @param 1 The instruction
2485;
2486; @param A0 FPU context (fxsave).
2487; @param A1 Pointer to a IEMFPURESULT for the output.
2488; @param A2 Pointer to the first 80-bit value (ST0)
2489; @param A3 Pointer to the second 80-bit value (STn).
2490;
2491%macro IEMIMPL_FPU_R80_BY_R80 2
2492BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2493 PROLOGUE_4_ARGS
2494 sub xSP, 20h
2495
2496 fninit
2497 fld tword [A3]
2498 fld tword [A2]
2499 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2500 %1 %2
2501
2502 fnstsw word [A1 + IEMFPURESULT.FSW]
2503 fnclex
2504 fstp tword [A1 + IEMFPURESULT.r80Result]
2505
2506 fninit
2507 add xSP, 20h
2508 EPILOGUE_4_ARGS
2509ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2510%endmacro
2511
2512IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2513IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2514IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2515IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2516IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2517IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2518IEMIMPL_FPU_R80_BY_R80 fprem, {}
2519IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2520IEMIMPL_FPU_R80_BY_R80 fscale, {}
2521
2522
2523;;
2524; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2525; storing the result in ST1 and popping the stack.
2526;
2527; @param 1 The instruction
2528;
2529; @param A0 FPU context (fxsave).
2530; @param A1 Pointer to a IEMFPURESULT for the output.
2531; @param A2 Pointer to the first 80-bit value (ST1).
2532; @param A3 Pointer to the second 80-bit value (ST0).
2533;
2534%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2535BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2536 PROLOGUE_4_ARGS
2537 sub xSP, 20h
2538
2539 fninit
2540 fld tword [A2]
2541 fld tword [A3]
2542 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2543 %1
2544
2545 fnstsw word [A1 + IEMFPURESULT.FSW]
2546 fnclex
2547 fstp tword [A1 + IEMFPURESULT.r80Result]
2548
2549 fninit
2550 add xSP, 20h
2551 EPILOGUE_4_ARGS
2552ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2553%endmacro
2554
2555IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2556IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2557IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2558
2559
2560;;
2561; FPU instruction working on two 80-bit floating point values, only
2562; returning FSW.
2563;
2564; @param 1 The instruction
2565;
2566; @param A0 FPU context (fxsave).
2567; @param A1 Pointer to a uint16_t for the resulting FSW.
2568; @param A2 Pointer to the first 80-bit value.
2569; @param A3 Pointer to the second 80-bit value.
2570;
2571%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2572BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2573 PROLOGUE_4_ARGS
2574 sub xSP, 20h
2575
2576 fninit
2577 fld tword [A3]
2578 fld tword [A2]
2579 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2580 %1 st0, st1
2581
2582 fnstsw word [A1]
2583
2584 fninit
2585 add xSP, 20h
2586 EPILOGUE_4_ARGS
2587ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2588%endmacro
2589
2590IEMIMPL_FPU_R80_BY_R80_FSW fcom
2591IEMIMPL_FPU_R80_BY_R80_FSW fucom
2592
2593
2594;;
2595; FPU instruction working on two 80-bit floating point values,
2596; returning FSW and EFLAGS (eax).
2597;
2598; @param 1 The instruction
2599;
2600; @returns EFLAGS in EAX.
2601; @param A0 FPU context (fxsave).
2602; @param A1 Pointer to a uint16_t for the resulting FSW.
2603; @param A2 Pointer to the first 80-bit value.
2604; @param A3 Pointer to the second 80-bit value.
2605;
2606%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2607BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2608 PROLOGUE_4_ARGS
2609 sub xSP, 20h
2610
2611 fninit
2612 fld tword [A3]
2613 fld tword [A2]
2614 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2615 %1 st1
2616
2617 fnstsw word [A1]
2618 pushf
2619 pop xAX
2620
2621 fninit
2622 add xSP, 20h
2623 EPILOGUE_4_ARGS
2624ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2625%endmacro
2626
2627IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2628IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2629
2630
2631;;
2632; FPU instruction working on one 80-bit floating point value.
2633;
2634; @param 1 The instruction
2635;
2636; @param A0 FPU context (fxsave).
2637; @param A1 Pointer to a IEMFPURESULT for the output.
2638; @param A2 Pointer to the 80-bit value.
2639;
2640%macro IEMIMPL_FPU_R80 1
2641BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2642 PROLOGUE_3_ARGS
2643 sub xSP, 20h
2644
2645 fninit
2646 fld tword [A2]
2647 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2648 %1
2649
2650 fnstsw word [A1 + IEMFPURESULT.FSW]
2651 fnclex
2652 fstp tword [A1 + IEMFPURESULT.r80Result]
2653
2654 fninit
2655 add xSP, 20h
2656 EPILOGUE_3_ARGS
2657ENDPROC iemAImpl_ %+ %1 %+ _r80
2658%endmacro
2659
2660IEMIMPL_FPU_R80 fchs
2661IEMIMPL_FPU_R80 fabs
2662IEMIMPL_FPU_R80 f2xm1
2663IEMIMPL_FPU_R80 fsqrt
2664IEMIMPL_FPU_R80 frndint
2665IEMIMPL_FPU_R80 fsin
2666IEMIMPL_FPU_R80 fcos
2667
2668
2669;;
2670; FPU instruction working on one 80-bit floating point value, only
2671; returning FSW.
2672;
2673; @param 1 The instruction
2674;
2675; @param A0 FPU context (fxsave).
2676; @param A1 Pointer to a uint16_t for the resulting FSW.
2677; @param A2 Pointer to the 80-bit value.
2678;
2679%macro IEMIMPL_FPU_R80_FSW 1
2680BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2681 PROLOGUE_3_ARGS
2682 sub xSP, 20h
2683
2684 fninit
2685 fld tword [A2]
2686 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2687 %1
2688
2689 fnstsw word [A1]
2690
2691 fninit
2692 add xSP, 20h
2693 EPILOGUE_3_ARGS
2694ENDPROC iemAImpl_ %+ %1 %+ _r80
2695%endmacro
2696
2697IEMIMPL_FPU_R80_FSW ftst
2698IEMIMPL_FPU_R80_FSW fxam
2699
2700
2701
2702;;
2703; FPU instruction loading a 80-bit floating point constant.
2704;
2705; @param 1 The instruction
2706;
2707; @param A0 FPU context (fxsave).
2708; @param A1 Pointer to a IEMFPURESULT for the output.
2709;
2710%macro IEMIMPL_FPU_R80_CONST 1
2711BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2712 PROLOGUE_2_ARGS
2713 sub xSP, 20h
2714
2715 fninit
2716 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2717 %1
2718
2719 fnstsw word [A1 + IEMFPURESULT.FSW]
2720 fnclex
2721 fstp tword [A1 + IEMFPURESULT.r80Result]
2722
2723 fninit
2724 add xSP, 20h
2725 EPILOGUE_2_ARGS
2726ENDPROC iemAImpl_ %+ %1 %+
2727%endmacro
2728
2729IEMIMPL_FPU_R80_CONST fld1
2730IEMIMPL_FPU_R80_CONST fldl2t
2731IEMIMPL_FPU_R80_CONST fldl2e
2732IEMIMPL_FPU_R80_CONST fldpi
2733IEMIMPL_FPU_R80_CONST fldlg2
2734IEMIMPL_FPU_R80_CONST fldln2
2735IEMIMPL_FPU_R80_CONST fldz
2736
2737
2738;;
2739; FPU instruction working on one 80-bit floating point value, outputing two.
2740;
2741; @param 1 The instruction
2742;
2743; @param A0 FPU context (fxsave).
2744; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2745; @param A2 Pointer to the 80-bit value.
2746;
2747%macro IEMIMPL_FPU_R80_R80 1
2748BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2749 PROLOGUE_3_ARGS
2750 sub xSP, 20h
2751
2752 fninit
2753 fld tword [A2]
2754 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2755 %1
2756
2757 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2758 fnclex
2759 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2760 fnclex
2761 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2762
2763 fninit
2764 add xSP, 20h
2765 EPILOGUE_3_ARGS
2766ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2767%endmacro
2768
2769IEMIMPL_FPU_R80_R80 fptan
2770IEMIMPL_FPU_R80_R80 fxtract
2771IEMIMPL_FPU_R80_R80 fsincos
2772
2773
2774
2775
2776;---------------------- SSE and MMX Operations ----------------------
2777
2778;; @todo what do we need to do for MMX?
2779%macro IEMIMPL_MMX_PROLOGUE 0
2780%endmacro
2781%macro IEMIMPL_MMX_EPILOGUE 0
2782%endmacro
2783
2784;; @todo what do we need to do for SSE?
2785%macro IEMIMPL_SSE_PROLOGUE 0
2786%endmacro
2787%macro IEMIMPL_SSE_EPILOGUE 0
2788%endmacro
2789
2790
2791;;
2792; Media instruction working on two full sized registers.
2793;
2794; @param 1 The instruction
2795;
2796; @param A0 FPU context (fxsave).
2797; @param A1 Pointer to the first media register size operand (input/output).
2798; @param A2 Pointer to the second media register size operand (input).
2799;
2800%macro IEMIMPL_MEDIA_F2 1
2801BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2802 PROLOGUE_3_ARGS
2803 IEMIMPL_MMX_PROLOGUE
2804
2805 movq mm0, [A1]
2806 movq mm1, [A2]
2807 %1 mm0, mm1
2808 movq [A1], mm0
2809
2810 IEMIMPL_MMX_EPILOGUE
2811 EPILOGUE_3_ARGS
2812ENDPROC iemAImpl_ %+ %1 %+ _u64
2813
2814BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2815 PROLOGUE_3_ARGS
2816 IEMIMPL_SSE_PROLOGUE
2817
2818 movdqu xmm0, [A1]
2819 movdqu xmm1, [A2]
2820 %1 xmm0, xmm1
2821 movdqu [A1], xmm0
2822
2823 IEMIMPL_SSE_EPILOGUE
2824 EPILOGUE_3_ARGS
2825ENDPROC iemAImpl_ %+ %1 %+ _u128
2826%endmacro
2827
2828IEMIMPL_MEDIA_F2 pxor
2829IEMIMPL_MEDIA_F2 pcmpeqb
2830IEMIMPL_MEDIA_F2 pcmpeqw
2831IEMIMPL_MEDIA_F2 pcmpeqd
2832
2833
2834;;
2835; Media instruction working on one full sized and one half sized register (lower half).
2836;
2837; @param 1 The instruction
2838; @param 2 1 if MMX is included, 0 if not.
2839;
2840; @param A0 FPU context (fxsave).
2841; @param A1 Pointer to the first full sized media register operand (input/output).
2842; @param A2 Pointer to the second half sized media register operand (input).
2843;
2844%macro IEMIMPL_MEDIA_F1L1 2
2845 %if %2 != 0
2846BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2847 PROLOGUE_3_ARGS
2848 IEMIMPL_MMX_PROLOGUE
2849
2850 movq mm0, [A1]
2851 movd mm1, [A2]
2852 %1 mm0, mm1
2853 movq [A1], mm0
2854
2855 IEMIMPL_MMX_EPILOGUE
2856 EPILOGUE_3_ARGS
2857ENDPROC iemAImpl_ %+ %1 %+ _u64
2858 %endif
2859
2860BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2861 PROLOGUE_3_ARGS
2862 IEMIMPL_SSE_PROLOGUE
2863
2864 movdqu xmm0, [A1]
2865 movq xmm1, [A2]
2866 %1 xmm0, xmm1
2867 movdqu [A1], xmm0
2868
2869 IEMIMPL_SSE_EPILOGUE
2870 EPILOGUE_3_ARGS
2871ENDPROC iemAImpl_ %+ %1 %+ _u128
2872%endmacro
2873
2874IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2875IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2876IEMIMPL_MEDIA_F1L1 punpckldq, 1
2877IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2878
2879
2880;;
2881; Media instruction working on one full sized and one half sized register (high half).
2882;
2883; @param 1 The instruction
2884; @param 2 1 if MMX is included, 0 if not.
2885;
2886; @param A0 FPU context (fxsave).
2887; @param A1 Pointer to the first full sized media register operand (input/output).
2888; @param A2 Pointer to the second full sized media register operand, where we
2889; will only use the upper half (input).
2890;
2891%macro IEMIMPL_MEDIA_F1H1 2
2892 %if %2 != 0
2893BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2894 PROLOGUE_3_ARGS
2895 IEMIMPL_MMX_PROLOGUE
2896
2897 movq mm0, [A1]
2898 movq mm1, [A2]
2899 %1 mm0, mm1
2900 movq [A1], mm0
2901
2902 IEMIMPL_MMX_EPILOGUE
2903 EPILOGUE_3_ARGS
2904ENDPROC iemAImpl_ %+ %1 %+ _u64
2905 %endif
2906
2907BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2908 PROLOGUE_3_ARGS
2909 IEMIMPL_SSE_PROLOGUE
2910
2911 movdqu xmm0, [A1]
2912 movdqu xmm1, [A2]
2913 %1 xmm0, xmm1
2914 movdqu [A1], xmm0
2915
2916 IEMIMPL_SSE_EPILOGUE
2917 EPILOGUE_3_ARGS
2918ENDPROC iemAImpl_ %+ %1 %+ _u128
2919%endmacro
2920
2921IEMIMPL_MEDIA_F1L1 punpckhbw, 1
2922IEMIMPL_MEDIA_F1L1 punpckhwd, 1
2923IEMIMPL_MEDIA_F1L1 punpckhdq, 1
2924IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
2925
2926
2927;
2928; Shufflers with evil 8-bit immediates.
2929;
2930
2931BEGINPROC_FASTCALL iemAImpl_pshufw, 16
2932 PROLOGUE_4_ARGS
2933 IEMIMPL_MMX_PROLOGUE
2934
2935 movq mm0, [A1]
2936 movq mm1, [A2]
2937 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
2938 lea T1, [.imm0 xWrtRIP]
2939 lea T1, [T1 + T0]
2940 call T1
2941 movq [A1], mm0
2942
2943 IEMIMPL_MMX_EPILOGUE
2944 EPILOGUE_4_ARGS
2945%assign bImm 0
2946%rep 256
2947.imm %+ bImm:
2948 pshufw mm0, mm1, bImm
2949 ret
2950 %assign bImm bImm + 1
2951%endrep
2952.immEnd: ; 256*5 == 0x500
2953dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2954dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2955ENDPROC iemAImpl_pshufw
2956
2957
2958%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
2959BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
2960 PROLOGUE_4_ARGS
2961 IEMIMPL_SSE_PROLOGUE
2962
2963 movdqu xmm0, [A1]
2964 movdqu xmm1, [A2]
2965 lea T1, [.imm0 xWrtRIP]
2966 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
2967 lea T1, [T1 + T0*2]
2968 call T1
2969 movdqu [A1], xmm0
2970
2971 IEMIMPL_SSE_EPILOGUE
2972 EPILOGUE_4_ARGS
2973 %assign bImm 0
2974 %rep 256
2975.imm %+ bImm:
2976 %1 xmm0, xmm1, bImm
2977 ret
2978 %assign bImm bImm + 1
2979 %endrep
2980.immEnd: ; 256*6 == 0x600
2981dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2982dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2983ENDPROC iemAImpl_ %+ %1
2984%endmacro
2985
2986IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
2987IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
2988IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
2989
2990
2991;
2992; Move byte mask.
2993;
2994
2995BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
2996 PROLOGUE_3_ARGS
2997 IEMIMPL_MMX_PROLOGUE
2998
2999 mov T0, [A1]
3000 movq mm1, [A2]
3001 pmovmskb T0, mm1
3002 mov [A1], T0
3003%ifdef RT_ARCH_X86
3004 mov dword [A1 + 4], 0
3005%endif
3006 IEMIMPL_MMX_EPILOGUE
3007 EPILOGUE_3_ARGS
3008ENDPROC iemAImpl_pmovmskb_u64
3009
3010BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3011 PROLOGUE_3_ARGS
3012 IEMIMPL_SSE_PROLOGUE
3013
3014 mov T0, [A1]
3015 movdqu xmm1, [A2]
3016 pmovmskb T0, xmm1
3017 mov [A1], T0
3018%ifdef RT_ARCH_X86
3019 mov dword [A1 + 4], 0
3020%endif
3021 IEMIMPL_SSE_EPILOGUE
3022 EPILOGUE_3_ARGS
3023ENDPROC iemAImpl_pmovmskb_u128
3024
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use