VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 96407

Last change on this file since 96407 was 96407, checked in by vboxsync, 3 years ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 255.7 KB
Line 
1/* $Id: tstIEMAImpl.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/ctype.h>
38#include <iprt/getopt.h>
39#include <iprt/initterm.h>
40#include <iprt/message.h>
41#include <iprt/mp.h>
42#include <iprt/rand.h>
43#include <iprt/stream.h>
44#include <iprt/string.h>
45#include <iprt/test.h>
46
47#include "tstIEMAImpl.h"
48
49
50/*********************************************************************************************************************************
51* Defined Constants And Macros *
52*********************************************************************************************************************************/
53#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
54#define ENTRY_EX(a_Name, a_uExtra) \
55 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
56 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
57 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
58
59#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
60#define ENTRY_EX_BIN(a_Name, a_uExtra) \
61 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
62 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
63 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
64
65#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
66#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
67 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
68 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
69 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
70
71#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
72#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
73 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
74 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
75 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
76
77#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
78 typedef struct a_TypeName \
79 { \
80 const char *pszName; \
81 a_FunctionPtrType pfn; \
82 a_FunctionPtrType pfnNative; \
83 a_TestType const *paTests; \
84 uint32_t const *pcTests; \
85 uint32_t uExtra; \
86 uint8_t idxCpuEflFlavour; \
87 } a_TypeName
88
89#define COUNT_VARIATIONS(a_SubTest) \
90 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
91
92
93/*********************************************************************************************************************************
94* Global Variables *
95*********************************************************************************************************************************/
96static RTTEST g_hTest;
97static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
98#ifdef TSTIEMAIMPL_WITH_GENERATOR
99static uint32_t g_cZeroDstTests = 2;
100static uint32_t g_cZeroSrcTests = 4;
101#endif
102static uint8_t *g_pu8, *g_pu8Two;
103static uint16_t *g_pu16, *g_pu16Two;
104static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
105static uint64_t *g_pu64, *g_pu64Two;
106static RTUINT128U *g_pu128, *g_pu128Two;
107
108static char g_aszBuf[32][256];
109static unsigned g_idxBuf = 0;
110
111static uint32_t g_cIncludeTestPatterns;
112static uint32_t g_cExcludeTestPatterns;
113static const char *g_apszIncludeTestPatterns[64];
114static const char *g_apszExcludeTestPatterns[64];
115
116static unsigned g_cVerbosity = 0;
117
118
119/*********************************************************************************************************************************
120* Internal Functions *
121*********************************************************************************************************************************/
122static const char *FormatR80(PCRTFLOAT80U pr80);
123static const char *FormatR64(PCRTFLOAT64U pr64);
124static const char *FormatR32(PCRTFLOAT32U pr32);
125
126
127/*
128 * Random helpers.
129 */
130
131static uint32_t RandEFlags(void)
132{
133 uint32_t fEfl = RTRandU32();
134 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
135}
136
137#ifdef TSTIEMAIMPL_WITH_GENERATOR
138
139static uint8_t RandU8(void)
140{
141 return RTRandU32Ex(0, 0xff);
142}
143
144
145static uint16_t RandU16(void)
146{
147 return RTRandU32Ex(0, 0xffff);
148}
149
150
151static uint32_t RandU32(void)
152{
153 return RTRandU32();
154}
155
156#endif
157
158static uint64_t RandU64(void)
159{
160 return RTRandU64();
161}
162
163
164static RTUINT128U RandU128(void)
165{
166 RTUINT128U Ret;
167 Ret.s.Hi = RTRandU64();
168 Ret.s.Lo = RTRandU64();
169 return Ret;
170}
171
172#ifdef TSTIEMAIMPL_WITH_GENERATOR
173
174static uint8_t RandU8Dst(uint32_t iTest)
175{
176 if (iTest < g_cZeroDstTests)
177 return 0;
178 return RandU8();
179}
180
181
182static uint8_t RandU8Src(uint32_t iTest)
183{
184 if (iTest < g_cZeroSrcTests)
185 return 0;
186 return RandU8();
187}
188
189
190static uint16_t RandU16Dst(uint32_t iTest)
191{
192 if (iTest < g_cZeroDstTests)
193 return 0;
194 return RandU16();
195}
196
197
198static uint16_t RandU16Src(uint32_t iTest)
199{
200 if (iTest < g_cZeroSrcTests)
201 return 0;
202 return RandU16();
203}
204
205
206static uint32_t RandU32Dst(uint32_t iTest)
207{
208 if (iTest < g_cZeroDstTests)
209 return 0;
210 return RandU32();
211}
212
213
214static uint32_t RandU32Src(uint32_t iTest)
215{
216 if (iTest < g_cZeroSrcTests)
217 return 0;
218 return RandU32();
219}
220
221
222static uint64_t RandU64Dst(uint32_t iTest)
223{
224 if (iTest < g_cZeroDstTests)
225 return 0;
226 return RandU64();
227}
228
229
230static uint64_t RandU64Src(uint32_t iTest)
231{
232 if (iTest < g_cZeroSrcTests)
233 return 0;
234 return RandU64();
235}
236
237
238/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
239static int16_t RandI16Src2(uint32_t iTest)
240{
241 if (iTest < 18 * 4)
242 switch (iTest % 4)
243 {
244 case 0: return 0;
245 case 1: return INT16_MAX;
246 case 2: return INT16_MIN;
247 case 3: break;
248 }
249 return (int16_t)RandU16();
250}
251
252
253/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
254static int32_t RandI32Src2(uint32_t iTest)
255{
256 if (iTest < 18 * 4)
257 switch (iTest % 4)
258 {
259 case 0: return 0;
260 case 1: return INT32_MAX;
261 case 2: return INT32_MIN;
262 case 3: break;
263 }
264 return (int32_t)RandU32();
265}
266
267
268#if 0
269static int64_t RandI64Src(uint32_t iTest)
270{
271 RT_NOREF(iTest);
272 return (int64_t)RandU64();
273}
274#endif
275
276
277static uint16_t RandFcw(void)
278{
279 return RandU16() & ~X86_FCW_ZERO_MASK;
280}
281
282
283static uint16_t RandFsw(void)
284{
285 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
286 return RandU16();
287}
288
289
290static uint32_t RandMxcsr(void)
291{
292 return RandU32() & ~X86_MXCSR_ZERO_MASK;
293}
294
295
296static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
297{
298 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
299 pr80->sj64.uFraction >>= cShift;
300 else
301 pr80->sj64.uFraction = (cShift % 19) + 1;
302}
303
304
305
306static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
307{
308 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
309
310 RTFLOAT80U r80;
311 r80.au64[0] = RandU64();
312 r80.au16[4] = RandU16();
313
314 /*
315 * Adjust the random stuff according to bType.
316 */
317 bType &= 0x1f;
318 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
319 {
320 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
321 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
322 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
323 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
324 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
325 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
326 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
327 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
328 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
329 }
330 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
331 {
332 /* Denormals (4,5) and Pseudo denormals (6,7) */
333 if (bType & 1)
334 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
335 else if (r80.sj64.uFraction == 0 && bType < 6)
336 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
337 r80.sj64.uExponent = 0;
338 r80.sj64.fInteger = bType >= 6;
339 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
340 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
341 }
342 else if (bType == 8 || bType == 9)
343 {
344 /* Pseudo NaN. */
345 if (bType & 1)
346 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
347 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
348 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
349 r80.sj64.uExponent = 0x7fff;
350 if (r80.sj64.fInteger)
351 r80.sj64.uFraction |= RT_BIT_64(62);
352 else
353 r80.sj64.uFraction &= ~RT_BIT_64(62);
354 r80.sj64.fInteger = 0;
355 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
356 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
357 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
358 }
359 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
360 {
361 /* Quiet and signalling NaNs. */
362 if (bType & 1)
363 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
364 else if (r80.sj64.uFraction == 0)
365 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
366 r80.sj64.uExponent = 0x7fff;
367 if (bType < 12)
368 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
369 else
370 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
371 r80.sj64.fInteger = 1;
372 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
373 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
374 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
375 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
376 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
377 }
378 else if (bType == 14 || bType == 15)
379 {
380 /* Unnormals */
381 if (bType & 1)
382 SafeR80FractionShift(&r80, RandU8() % 62);
383 r80.sj64.fInteger = 0;
384 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
385 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
386 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
387 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
388 }
389 else if (bType < 26)
390 {
391 /* Make sure we have lots of normalized values. */
392 if (!fIntTarget)
393 {
394 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
395 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
396 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
397 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
398 r80.sj64.fInteger = 1;
399 if (r80.sj64.uExponent <= uMinExp)
400 r80.sj64.uExponent = uMinExp + 1;
401 else if (r80.sj64.uExponent >= uMaxExp)
402 r80.sj64.uExponent = uMaxExp - 1;
403
404 if (bType == 16)
405 { /* All 1s is useful to testing rounding. Also try trigger special
406 behaviour by sometimes rounding out of range, while we're at it. */
407 r80.sj64.uFraction = RT_BIT_64(63) - 1;
408 uint8_t bExp = RandU8();
409 if ((bExp & 3) == 0)
410 r80.sj64.uExponent = uMaxExp - 1;
411 else if ((bExp & 3) == 1)
412 r80.sj64.uExponent = uMinExp + 1;
413 else if ((bExp & 3) == 2)
414 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
415 }
416 }
417 else
418 {
419 /* integer target: */
420 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
421 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
422 r80.sj64.fInteger = 1;
423 if (r80.sj64.uExponent < uMinExp)
424 r80.sj64.uExponent = uMinExp;
425 else if (r80.sj64.uExponent > uMaxExp)
426 r80.sj64.uExponent = uMaxExp;
427
428 if (bType == 16)
429 { /* All 1s is useful to testing rounding. Also try trigger special
430 behaviour by sometimes rounding out of range, while we're at it. */
431 r80.sj64.uFraction = RT_BIT_64(63) - 1;
432 uint8_t bExp = RandU8();
433 if ((bExp & 3) == 0)
434 r80.sj64.uExponent = uMaxExp;
435 else if ((bExp & 3) == 1)
436 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
437 }
438 }
439
440 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
441 }
442 return r80;
443}
444
445
446static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
447{
448 /*
449 * Make it more likely that we get a good selection of special values.
450 */
451 return RandR80Ex(RandU8(), cTarget, fIntTarget);
452
453}
454
455
456static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
457{
458 /* Make sure we cover all the basic types first before going for random selection: */
459 if (iTest <= 18)
460 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
461 return RandR80(cTarget, fIntTarget);
462}
463
464
465/**
466 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
467 * to a 0..17, covering all basic value types.
468 */
469static uint8_t RandR80Src12RemapType(uint8_t bType)
470{
471 switch (bType)
472 {
473 case 0: return 18; /* normal */
474 case 1: return 16; /* normal extreme rounding */
475 case 2: return 14; /* unnormal */
476 case 3: return 12; /* Signalling NaN */
477 case 4: return 10; /* Quiet NaN */
478 case 5: return 8; /* PseudoNaN */
479 case 6: return 6; /* Pseudo Denormal */
480 case 7: return 4; /* Denormal */
481 case 8: return 3; /* Indefinite */
482 case 9: return 2; /* Infinity */
483 case 10: return 1; /* Pseudo-Infinity */
484 case 11: return 0; /* Zero */
485 default: AssertFailedReturn(18);
486 }
487}
488
489
490/**
491 * This works in tandem with RandR80Src2 to make sure we cover all operand
492 * type mixes first before we venture into regular random testing.
493 *
494 * There are 11 basic variations, when we leave out the five odd ones using
495 * SafeR80FractionShift. Because of the special normalized value targetting at
496 * rounding, we make it an even 12. So 144 combinations for two operands.
497 */
498static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
499{
500 if (cPartnerBits == 80)
501 {
502 Assert(!fPartnerInt);
503 if (iTest < 12 * 12)
504 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
505 }
506 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
507 {
508 if (iTest < 12 * 10)
509 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
510 }
511 else if (iTest < 18 * 4 && fPartnerInt)
512 return RandR80Ex(iTest / 4);
513 return RandR80();
514}
515
516
517/** Partner to RandR80Src1. */
518static RTFLOAT80U RandR80Src2(uint32_t iTest)
519{
520 if (iTest < 12 * 12)
521 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
522 return RandR80();
523}
524
525
526static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
527{
528 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
529 pr64->s64.uFraction >>= cShift;
530 else
531 pr64->s64.uFraction = (cShift % 19) + 1;
532}
533
534
535static RTFLOAT64U RandR64Ex(uint8_t bType)
536{
537 RTFLOAT64U r64;
538 r64.u = RandU64();
539
540 /*
541 * Make it more likely that we get a good selection of special values.
542 * On average 6 out of 16 calls should return a special value.
543 */
544 bType &= 0xf;
545 if (bType == 0 || bType == 1)
546 {
547 /* 0 or Infinity. We only keep fSign here. */
548 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
549 r64.s.uFractionHigh = 0;
550 r64.s.uFractionLow = 0;
551 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
552 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
553 }
554 else if (bType == 2 || bType == 3)
555 {
556 /* Subnormals */
557 if (bType == 3)
558 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
559 else if (r64.s64.uFraction == 0)
560 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
561 r64.s64.uExponent = 0;
562 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
563 }
564 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
565 {
566 /* NaNs */
567 if (bType & 1)
568 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
569 else if (r64.s64.uFraction == 0)
570 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
571 r64.s64.uExponent = 0x7ff;
572 if (bType < 6)
573 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
574 else
575 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
576 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
577 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
578 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
579 }
580 else if (bType < 12)
581 {
582 /* Make sure we have lots of normalized values. */
583 if (r64.s.uExponent == 0)
584 r64.s.uExponent = 1;
585 else if (r64.s.uExponent == 0x7ff)
586 r64.s.uExponent = 0x7fe;
587 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
588 }
589 return r64;
590}
591
592
593static RTFLOAT64U RandR64Src(uint32_t iTest)
594{
595 if (iTest < 16)
596 return RandR64Ex(iTest);
597 return RandR64Ex(RandU8());
598}
599
600
601/** Pairing with a 80-bit floating point arg. */
602static RTFLOAT64U RandR64Src2(uint32_t iTest)
603{
604 if (iTest < 12 * 10)
605 return RandR64Ex(9 - iTest % 10); /* start with normal values */
606 return RandR64Ex(RandU8());
607}
608
609
610static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
611{
612 if (pr32->s.uFraction >= RT_BIT_32(cShift))
613 pr32->s.uFraction >>= cShift;
614 else
615 pr32->s.uFraction = (cShift % 19) + 1;
616}
617
618
619static RTFLOAT32U RandR32Ex(uint8_t bType)
620{
621 RTFLOAT32U r32;
622 r32.u = RandU32();
623
624 /*
625 * Make it more likely that we get a good selection of special values.
626 * On average 6 out of 16 calls should return a special value.
627 */
628 bType &= 0xf;
629 if (bType == 0 || bType == 1)
630 {
631 /* 0 or Infinity. We only keep fSign here. */
632 r32.s.uExponent = bType == 0 ? 0 : 0xff;
633 r32.s.uFraction = 0;
634 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
635 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
636 }
637 else if (bType == 2 || bType == 3)
638 {
639 /* Subnormals */
640 if (bType == 3)
641 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
642 else if (r32.s.uFraction == 0)
643 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
644 r32.s.uExponent = 0;
645 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
646 }
647 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
648 {
649 /* NaNs */
650 if (bType & 1)
651 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
652 else if (r32.s.uFraction == 0)
653 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
654 r32.s.uExponent = 0xff;
655 if (bType < 6)
656 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
657 else
658 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
659 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
660 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
661 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
662 }
663 else if (bType < 12)
664 {
665 /* Make sure we have lots of normalized values. */
666 if (r32.s.uExponent == 0)
667 r32.s.uExponent = 1;
668 else if (r32.s.uExponent == 0xff)
669 r32.s.uExponent = 0xfe;
670 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
671 }
672 return r32;
673}
674
675
676static RTFLOAT32U RandR32Src(uint32_t iTest)
677{
678 if (iTest < 16)
679 return RandR32Ex(iTest);
680 return RandR32Ex(RandU8());
681}
682
683
684/** Pairing with a 80-bit floating point arg. */
685static RTFLOAT32U RandR32Src2(uint32_t iTest)
686{
687 if (iTest < 12 * 10)
688 return RandR32Ex(9 - iTest % 10); /* start with normal values */
689 return RandR32Ex(RandU8());
690}
691
692
693static RTPBCD80U RandD80Src(uint32_t iTest)
694{
695 if (iTest < 3)
696 {
697 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
698 return d80Zero;
699 }
700 if (iTest < 5)
701 {
702 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
703 return d80Ind;
704 }
705
706 RTPBCD80U d80;
707 uint8_t b = RandU8();
708 d80.s.fSign = b & 1;
709
710 if ((iTest & 7) >= 6)
711 {
712 /* Illegal */
713 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
714 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
715 d80.s.abPairs[iPair] = RandU8();
716 }
717 else
718 {
719 /* Normal */
720 d80.s.uPad = 0;
721 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
722 {
723 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
724 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
725 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
726 }
727 }
728 return d80;
729}
730
731
732const char *GenFormatR80(PCRTFLOAT80U plrd)
733{
734 if (RTFLOAT80U_IS_ZERO(plrd))
735 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
736 if (RTFLOAT80U_IS_INF(plrd))
737 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
738 if (RTFLOAT80U_IS_INDEFINITE(plrd))
739 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
740 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
741 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
742 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
743 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
744
745 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
746 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
747 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
748 return pszBuf;
749}
750
751const char *GenFormatR64(PCRTFLOAT64U prd)
752{
753 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
754 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
755 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
756 return pszBuf;
757}
758
759
760const char *GenFormatR32(PCRTFLOAT32U pr)
761{
762 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
763 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
764 return pszBuf;
765}
766
767
768const char *GenFormatD80(PCRTPBCD80U pd80)
769{
770 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
771 size_t off;
772 if (pd80->s.uPad == 0)
773 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
774 else
775 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
776 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
777 while (iPair-- > 0)
778 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
779 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
780 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
781 pszBuf[off++] = ')';
782 pszBuf[off++] = '\0';
783 return pszBuf;
784}
785
786
787const char *GenFormatI64(int64_t i64)
788{
789 if (i64 == INT64_MIN) /* This one is problematic */
790 return "INT64_MIN";
791 if (i64 == INT64_MAX)
792 return "INT64_MAX";
793 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
794 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
795 return pszBuf;
796}
797
798
799const char *GenFormatI64(int64_t const *pi64)
800{
801 return GenFormatI64(*pi64);
802}
803
804
805const char *GenFormatI32(int32_t i32)
806{
807 if (i32 == INT32_MIN) /* This one is problematic */
808 return "INT32_MIN";
809 if (i32 == INT32_MAX)
810 return "INT32_MAX";
811 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
812 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
813 return pszBuf;
814}
815
816
817const char *GenFormatI32(int32_t const *pi32)
818{
819 return GenFormatI32(*pi32);
820}
821
822
823const char *GenFormatI16(int16_t i16)
824{
825 if (i16 == INT16_MIN) /* This one is problematic */
826 return "INT16_MIN";
827 if (i16 == INT16_MAX)
828 return "INT16_MAX";
829 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
830 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
831 return pszBuf;
832}
833
834
835const char *GenFormatI16(int16_t const *pi16)
836{
837 return GenFormatI16(*pi16);
838}
839
840
841static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
842{
843 /* We want to tag the generated source code with the revision that produced it. */
844 static char s_szRev[] = "$Revision: 96407 $";
845 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
846 size_t cchRev = 0;
847 while (RT_C_IS_DIGIT(pszRev[cchRev]))
848 cchRev++;
849
850 RTStrmPrintf(pOut,
851 "/* $Id: tstIEMAImpl.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */\n"
852 "/** @file\n"
853 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
854 " */\n"
855 "\n"
856 "/*\n"
857 " * Copyright (C) 2022 Oracle Corporation\n"
858 " *\n"
859 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
860 " * available from http://www.virtualbox.org. This file is free software;\n"
861 " * you can redistribute it and/or modify it under the terms of the GNU\n"
862 " * General Public License (GPL) as published by the Free Software\n"
863 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
864 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
865 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
866 " */\n"
867 "\n"
868 "#include \"tstIEMAImpl.h\"\n"
869 "\n"
870 ,
871 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
872}
873
874
875static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
876{
877 PRTSTREAM pOut = NULL;
878 int rc = RTStrmOpen(pszFilename, "w", &pOut);
879 if (RT_SUCCESS(rc))
880 {
881 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
882 return pOut;
883 }
884 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
885 return NULL;
886}
887
888
889static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
890{
891 RTStrmPrintf(pOut,
892 "\n"
893 "/* end of file */\n");
894 int rc = RTStrmClose(pOut);
895 if (RT_SUCCESS(rc))
896 return rcExit;
897 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
898}
899
900
901static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
902{
903 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
904}
905
906
907static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
908{
909 RTStrmPrintf(pOut,
910 "};\n"
911 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
912 "\n",
913 pszName, pszName);
914}
915
916#endif /* TSTIEMAIMPL_WITH_GENERATOR */
917
918
919/*
920 * Test helpers.
921 */
922static bool IsTestEnabled(const char *pszName)
923{
924 /* Process excludes first: */
925 uint32_t i = g_cExcludeTestPatterns;
926 while (i-- > 0)
927 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
928 return false;
929
930 /* If no include patterns, everything is included: */
931 i = g_cIncludeTestPatterns;
932 if (!i)
933 return true;
934
935 /* Otherwise only tests in the include patters gets tested: */
936 while (i-- > 0)
937 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
938 return true;
939
940 return false;
941}
942
943
944static bool SubTestAndCheckIfEnabled(const char *pszName)
945{
946 RTTestSub(g_hTest, pszName);
947 if (IsTestEnabled(pszName))
948 return true;
949 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
950 return false;
951}
952
953
954static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
955{
956 if (fActual == fExpected)
957 return "";
958
959 uint32_t const fXor = fActual ^ fExpected;
960 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
961 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
962
963 static struct
964 {
965 const char *pszName;
966 uint32_t fFlag;
967 } const s_aFlags[] =
968 {
969#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
970 EFL_ENTRY(CF),
971 EFL_ENTRY(PF),
972 EFL_ENTRY(AF),
973 EFL_ENTRY(ZF),
974 EFL_ENTRY(SF),
975 EFL_ENTRY(TF),
976 EFL_ENTRY(IF),
977 EFL_ENTRY(DF),
978 EFL_ENTRY(OF),
979 EFL_ENTRY(IOPL),
980 EFL_ENTRY(NT),
981 EFL_ENTRY(RF),
982 EFL_ENTRY(VM),
983 EFL_ENTRY(AC),
984 EFL_ENTRY(VIF),
985 EFL_ENTRY(VIP),
986 EFL_ENTRY(ID),
987 };
988 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
989 if (s_aFlags[i].fFlag & fXor)
990 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
991 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
992 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
993 return pszBuf;
994}
995
996
997static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
998{
999 if (fActual == fExpected)
1000 return "";
1001
1002 uint16_t const fXor = fActual ^ fExpected;
1003 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1004 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1005
1006 static struct
1007 {
1008 const char *pszName;
1009 uint32_t fFlag;
1010 } const s_aFlags[] =
1011 {
1012#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1013 FSW_ENTRY(IE),
1014 FSW_ENTRY(DE),
1015 FSW_ENTRY(ZE),
1016 FSW_ENTRY(OE),
1017 FSW_ENTRY(UE),
1018 FSW_ENTRY(PE),
1019 FSW_ENTRY(SF),
1020 FSW_ENTRY(ES),
1021 FSW_ENTRY(C0),
1022 FSW_ENTRY(C1),
1023 FSW_ENTRY(C2),
1024 FSW_ENTRY(C3),
1025 FSW_ENTRY(B),
1026 };
1027 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1028 if (s_aFlags[i].fFlag & fXor)
1029 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1030 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1031 if (fXor & X86_FSW_TOP_MASK)
1032 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1033 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1034#if 0 /* For debugging fprem & fprem1 */
1035 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1036 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1037#endif
1038 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1039 return pszBuf;
1040}
1041
1042
1043static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1044{
1045 if (fActual == fExpected)
1046 return "";
1047
1048 uint16_t const fXor = fActual ^ fExpected;
1049 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1050 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1051
1052 static struct
1053 {
1054 const char *pszName;
1055 uint32_t fFlag;
1056 } const s_aFlags[] =
1057 {
1058#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1059 MXCSR_ENTRY(IE),
1060 MXCSR_ENTRY(DE),
1061 MXCSR_ENTRY(ZE),
1062 MXCSR_ENTRY(OE),
1063 MXCSR_ENTRY(UE),
1064 MXCSR_ENTRY(PE),
1065
1066 MXCSR_ENTRY(IM),
1067 MXCSR_ENTRY(DM),
1068 MXCSR_ENTRY(ZM),
1069 MXCSR_ENTRY(OM),
1070 MXCSR_ENTRY(UM),
1071 MXCSR_ENTRY(PM),
1072
1073 MXCSR_ENTRY(DAZ),
1074 MXCSR_ENTRY(FZ),
1075#undef MXCSR_ENTRY
1076 };
1077 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1078 if (s_aFlags[i].fFlag & fXor)
1079 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1080 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1081 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1082 return pszBuf;
1083}
1084
1085
1086static const char *FormatFcw(uint16_t fFcw)
1087{
1088 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1089
1090 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1091 switch (fFcw & X86_FCW_PC_MASK)
1092 {
1093 case X86_FCW_PC_24: pszPC = "PC24"; break;
1094 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1095 case X86_FCW_PC_53: pszPC = "PC53"; break;
1096 case X86_FCW_PC_64: pszPC = "PC64"; break;
1097 }
1098
1099 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1100 switch (fFcw & X86_FCW_RC_MASK)
1101 {
1102 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1103 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1104 case X86_FCW_RC_UP: pszRC = "UP"; break;
1105 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1106 }
1107 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1108
1109 static struct
1110 {
1111 const char *pszName;
1112 uint32_t fFlag;
1113 } const s_aFlags[] =
1114 {
1115#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1116 FCW_ENTRY(IM),
1117 FCW_ENTRY(DM),
1118 FCW_ENTRY(ZM),
1119 FCW_ENTRY(OM),
1120 FCW_ENTRY(UM),
1121 FCW_ENTRY(PM),
1122 { "6M", 64 },
1123 };
1124 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1125 if (fFcw & s_aFlags[i].fFlag)
1126 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1127
1128 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1129 return pszBuf;
1130}
1131
1132
1133static const char *FormatMxcsr(uint32_t fMxcsr)
1134{
1135 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1136
1137 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1138 switch (fMxcsr & X86_MXCSR_RC_MASK)
1139 {
1140 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1141 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1142 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1143 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1144 }
1145
1146 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1147 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1148 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1149
1150 static struct
1151 {
1152 const char *pszName;
1153 uint32_t fFlag;
1154 } const s_aFlags[] =
1155 {
1156#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1157 MXCSR_ENTRY(IE),
1158 MXCSR_ENTRY(DE),
1159 MXCSR_ENTRY(ZE),
1160 MXCSR_ENTRY(OE),
1161 MXCSR_ENTRY(UE),
1162 MXCSR_ENTRY(PE),
1163
1164 MXCSR_ENTRY(IM),
1165 MXCSR_ENTRY(DM),
1166 MXCSR_ENTRY(ZM),
1167 MXCSR_ENTRY(OM),
1168 MXCSR_ENTRY(UM),
1169 MXCSR_ENTRY(PM),
1170 { "6M", 64 },
1171 };
1172 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1173 if (fMxcsr & s_aFlags[i].fFlag)
1174 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1175
1176 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1177 return pszBuf;
1178}
1179
1180
1181static const char *FormatR80(PCRTFLOAT80U pr80)
1182{
1183 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1184 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1185 return pszBuf;
1186}
1187
1188
1189static const char *FormatR64(PCRTFLOAT64U pr64)
1190{
1191 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1192 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1193 return pszBuf;
1194}
1195
1196
1197static const char *FormatR32(PCRTFLOAT32U pr32)
1198{
1199 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1200 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1201 return pszBuf;
1202}
1203
1204
1205static const char *FormatD80(PCRTPBCD80U pd80)
1206{
1207 /* There is only one indefinite endcoding (same as for 80-bit
1208 floating point), so get it out of the way first: */
1209 if (RTPBCD80U_IS_INDEFINITE(pd80))
1210 return "Ind";
1211
1212 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1213 size_t off = 0;
1214 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1215 unsigned cBadDigits = 0;
1216 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1217 while (iPair-- > 0)
1218 {
1219 static const char s_szDigits[] = "0123456789abcdef";
1220 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1221 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1222 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1223 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1224 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1225 }
1226 if (cBadDigits || pd80->s.uPad != 0)
1227 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1228 pszBuf[off] = '\0';
1229 return pszBuf;
1230}
1231
1232
1233#if 0
1234static const char *FormatI64(int64_t const *piVal)
1235{
1236 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1237 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1238 return pszBuf;
1239}
1240#endif
1241
1242
1243static const char *FormatI32(int32_t const *piVal)
1244{
1245 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1246 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1247 return pszBuf;
1248}
1249
1250
1251static const char *FormatI16(int16_t const *piVal)
1252{
1253 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1254 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1255 return pszBuf;
1256}
1257
1258
1259/*
1260 * Binary operations.
1261 */
1262TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1263TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1264TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1265TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1266
1267#ifdef TSTIEMAIMPL_WITH_GENERATOR
1268# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1269static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1270{ \
1271 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1272 { \
1273 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1274 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1275 PRTSTREAM pOutFn = pOut; \
1276 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1277 { \
1278 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1279 continue; \
1280 pOutFn = pOutCpu; \
1281 } \
1282 \
1283 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1284 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1285 { \
1286 a_TestType Test; \
1287 Test.fEflIn = RandEFlags(); \
1288 Test.fEflOut = Test.fEflIn; \
1289 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1290 Test.uDstOut = Test.uDstIn; \
1291 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1292 if (g_aBinU ## a_cBits[iFn].uExtra) \
1293 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1294 Test.uMisc = 0; \
1295 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1296 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1297 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1298 } \
1299 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1300 } \
1301}
1302#else
1303# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1304#endif
1305
1306#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1307GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1308\
1309static void BinU ## a_cBits ## Test(void) \
1310{ \
1311 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1312 { \
1313 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1314 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1315 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1316 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1317 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1318 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1319 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1320 { \
1321 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1322 { \
1323 uint32_t fEfl = paTests[iTest].fEflIn; \
1324 a_uType uDst = paTests[iTest].uDstIn; \
1325 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1326 if ( uDst != paTests[iTest].uDstOut \
1327 || fEfl != paTests[iTest].fEflOut) \
1328 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1329 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1330 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1331 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1332 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1333 else \
1334 { \
1335 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1336 *g_pfEfl = paTests[iTest].fEflIn; \
1337 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1338 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1339 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1340 } \
1341 } \
1342 pfn = a_aSubTests[iFn].pfnNative; \
1343 } \
1344 } \
1345}
1346
1347
1348/*
1349 * 8-bit binary operations.
1350 */
1351static const BINU8_T g_aBinU8[] =
1352{
1353 ENTRY(add_u8),
1354 ENTRY(add_u8_locked),
1355 ENTRY(adc_u8),
1356 ENTRY(adc_u8_locked),
1357 ENTRY(sub_u8),
1358 ENTRY(sub_u8_locked),
1359 ENTRY(sbb_u8),
1360 ENTRY(sbb_u8_locked),
1361 ENTRY(or_u8),
1362 ENTRY(or_u8_locked),
1363 ENTRY(xor_u8),
1364 ENTRY(xor_u8_locked),
1365 ENTRY(and_u8),
1366 ENTRY(and_u8_locked),
1367 ENTRY(cmp_u8),
1368 ENTRY(test_u8),
1369};
1370TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1371
1372
1373/*
1374 * 16-bit binary operations.
1375 */
1376static const BINU16_T g_aBinU16[] =
1377{
1378 ENTRY(add_u16),
1379 ENTRY(add_u16_locked),
1380 ENTRY(adc_u16),
1381 ENTRY(adc_u16_locked),
1382 ENTRY(sub_u16),
1383 ENTRY(sub_u16_locked),
1384 ENTRY(sbb_u16),
1385 ENTRY(sbb_u16_locked),
1386 ENTRY(or_u16),
1387 ENTRY(or_u16_locked),
1388 ENTRY(xor_u16),
1389 ENTRY(xor_u16_locked),
1390 ENTRY(and_u16),
1391 ENTRY(and_u16_locked),
1392 ENTRY(cmp_u16),
1393 ENTRY(test_u16),
1394 ENTRY_EX(bt_u16, 1),
1395 ENTRY_EX(btc_u16, 1),
1396 ENTRY_EX(btc_u16_locked, 1),
1397 ENTRY_EX(btr_u16, 1),
1398 ENTRY_EX(btr_u16_locked, 1),
1399 ENTRY_EX(bts_u16, 1),
1400 ENTRY_EX(bts_u16_locked, 1),
1401 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1402 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1403 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1404 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1405 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1406 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1407 ENTRY(arpl),
1408};
1409TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1410
1411
1412/*
1413 * 32-bit binary operations.
1414 */
1415static const BINU32_T g_aBinU32[] =
1416{
1417 ENTRY(add_u32),
1418 ENTRY(add_u32_locked),
1419 ENTRY(adc_u32),
1420 ENTRY(adc_u32_locked),
1421 ENTRY(sub_u32),
1422 ENTRY(sub_u32_locked),
1423 ENTRY(sbb_u32),
1424 ENTRY(sbb_u32_locked),
1425 ENTRY(or_u32),
1426 ENTRY(or_u32_locked),
1427 ENTRY(xor_u32),
1428 ENTRY(xor_u32_locked),
1429 ENTRY(and_u32),
1430 ENTRY(and_u32_locked),
1431 ENTRY(cmp_u32),
1432 ENTRY(test_u32),
1433 ENTRY_EX(bt_u32, 1),
1434 ENTRY_EX(btc_u32, 1),
1435 ENTRY_EX(btc_u32_locked, 1),
1436 ENTRY_EX(btr_u32, 1),
1437 ENTRY_EX(btr_u32_locked, 1),
1438 ENTRY_EX(bts_u32, 1),
1439 ENTRY_EX(bts_u32_locked, 1),
1440 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1441 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1442 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1443 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1444 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1445 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1446};
1447TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1448
1449
1450/*
1451 * 64-bit binary operations.
1452 */
1453static const BINU64_T g_aBinU64[] =
1454{
1455 ENTRY(add_u64),
1456 ENTRY(add_u64_locked),
1457 ENTRY(adc_u64),
1458 ENTRY(adc_u64_locked),
1459 ENTRY(sub_u64),
1460 ENTRY(sub_u64_locked),
1461 ENTRY(sbb_u64),
1462 ENTRY(sbb_u64_locked),
1463 ENTRY(or_u64),
1464 ENTRY(or_u64_locked),
1465 ENTRY(xor_u64),
1466 ENTRY(xor_u64_locked),
1467 ENTRY(and_u64),
1468 ENTRY(and_u64_locked),
1469 ENTRY(cmp_u64),
1470 ENTRY(test_u64),
1471 ENTRY_EX(bt_u64, 1),
1472 ENTRY_EX(btc_u64, 1),
1473 ENTRY_EX(btc_u64_locked, 1),
1474 ENTRY_EX(btr_u64, 1),
1475 ENTRY_EX(btr_u64_locked, 1),
1476 ENTRY_EX(bts_u64, 1),
1477 ENTRY_EX(bts_u64_locked, 1),
1478 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1479 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1480 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1481 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1482 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1483 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1484};
1485TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1486
1487
1488/*
1489 * XCHG
1490 */
1491static void XchgTest(void)
1492{
1493 if (!SubTestAndCheckIfEnabled("xchg"))
1494 return;
1495 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1496 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1497 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1498 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1499
1500 static struct
1501 {
1502 uint8_t cb; uint64_t fMask;
1503 union
1504 {
1505 uintptr_t pfn;
1506 FNIEMAIMPLXCHGU8 *pfnU8;
1507 FNIEMAIMPLXCHGU16 *pfnU16;
1508 FNIEMAIMPLXCHGU32 *pfnU32;
1509 FNIEMAIMPLXCHGU64 *pfnU64;
1510 } u;
1511 }
1512 s_aXchgWorkers[] =
1513 {
1514 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1515 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1516 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1517 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1518 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1519 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1520 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1521 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1522 };
1523 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1524 {
1525 RTUINT64U uIn1, uIn2, uMem, uDst;
1526 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1527 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1528 if (uIn1.u == uIn2.u)
1529 uDst.u = uIn2.u = ~uIn2.u;
1530
1531 switch (s_aXchgWorkers[i].cb)
1532 {
1533 case 1:
1534 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1535 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1536 break;
1537 case 2:
1538 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1539 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1540 break;
1541 case 4:
1542 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1543 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1544 break;
1545 case 8:
1546 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1547 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1548 break;
1549 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1550 }
1551
1552 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1553 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1554 }
1555}
1556
1557
1558/*
1559 * XADD
1560 */
1561static void XaddTest(void)
1562{
1563#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1564 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1565 static struct \
1566 { \
1567 const char *pszName; \
1568 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1569 BINU ## a_cBits ## _TEST_T const *paTests; \
1570 uint32_t const *pcTests; \
1571 } const s_aFuncs[] = \
1572 { \
1573 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1574 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1575 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1576 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1577 }; \
1578 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1579 { \
1580 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1581 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1582 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1583 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1584 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1585 { \
1586 uint32_t fEfl = paTests[iTest].fEflIn; \
1587 a_Type uSrc = paTests[iTest].uSrcIn; \
1588 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1589 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1590 if ( fEfl != paTests[iTest].fEflOut \
1591 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1592 || uSrc != paTests[iTest].uDstIn) \
1593 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1594 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1595 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1596 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1597 } \
1598 } \
1599 } while(0)
1600 TEST_XADD(8, uint8_t, "%#04x");
1601 TEST_XADD(16, uint16_t, "%#06x");
1602 TEST_XADD(32, uint32_t, "%#010RX32");
1603 TEST_XADD(64, uint64_t, "%#010RX64");
1604}
1605
1606
1607/*
1608 * CMPXCHG
1609 */
1610
1611static void CmpXchgTest(void)
1612{
1613#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1614 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1615 static struct \
1616 { \
1617 const char *pszName; \
1618 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1619 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1620 BINU ## a_cBits ## _TEST_T const *paTests; \
1621 uint32_t const *pcTests; \
1622 } const s_aFuncs[] = \
1623 { \
1624 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1625 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1626 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1627 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1628 }; \
1629 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1630 { \
1631 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1632 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1633 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1634 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1635 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1636 { \
1637 /* as is (99% likely to be negative). */ \
1638 uint32_t fEfl = paTests[iTest].fEflIn; \
1639 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1640 a_Type uA = paTests[iTest].uDstIn; \
1641 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1642 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1643 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1644 if ( fEfl != paTests[iTest].fEflOut \
1645 || *g_pu ## a_cBits != uExpect \
1646 || uA != paTests[iTest].uSrcIn) \
1647 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1648 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1649 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1650 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1651 /* positive */ \
1652 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1653 uA = paTests[iTest].uDstIn; \
1654 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1655 fEfl = paTests[iTest].fEflIn; \
1656 uA = paTests[iTest].uDstIn; \
1657 *g_pu ## a_cBits = uA; \
1658 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1659 if ( fEfl != fEflExpect \
1660 || *g_pu ## a_cBits != uNew \
1661 || uA != paTests[iTest].uDstIn) \
1662 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1663 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1664 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1665 EFlagsDiff(fEfl, fEflExpect)); \
1666 } \
1667 } \
1668 } while(0)
1669 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1670 TEST_CMPXCHG(16, uint16_t, "%#06x");
1671 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1672#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1673 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1674#endif
1675}
1676
1677static void CmpXchg8bTest(void)
1678{
1679 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1680 static struct
1681 {
1682 const char *pszName;
1683 FNIEMAIMPLCMPXCHG8B *pfn;
1684 } const s_aFuncs[] =
1685 {
1686 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1687 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1688 };
1689 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1690 {
1691 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1692 continue;
1693 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1694 {
1695 uint64_t const uOldValue = RandU64();
1696 uint64_t const uNewValue = RandU64();
1697
1698 /* positive test. */
1699 RTUINT64U uA, uB;
1700 uB.u = uNewValue;
1701 uA.u = uOldValue;
1702 *g_pu64 = uOldValue;
1703 uint32_t fEflIn = RandEFlags();
1704 uint32_t fEfl = fEflIn;
1705 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1706 if ( fEfl != (fEflIn | X86_EFL_ZF)
1707 || *g_pu64 != uNewValue
1708 || uA.u != uOldValue)
1709 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1710 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1711 fEfl, *g_pu64, uA.u,
1712 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1713 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1714
1715 /* negative */
1716 uint64_t const uExpect = ~uOldValue;
1717 *g_pu64 = uExpect;
1718 uA.u = uOldValue;
1719 uB.u = uNewValue;
1720 fEfl = fEflIn = RandEFlags();
1721 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1722 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1723 || *g_pu64 != uExpect
1724 || uA.u != uExpect)
1725 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1726 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1727 fEfl, *g_pu64, uA.u,
1728 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1729 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1730 }
1731 }
1732}
1733
1734static void CmpXchg16bTest(void)
1735{
1736 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1737 static struct
1738 {
1739 const char *pszName;
1740 FNIEMAIMPLCMPXCHG16B *pfn;
1741 } const s_aFuncs[] =
1742 {
1743 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1744 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1745#if !defined(RT_ARCH_ARM64)
1746 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1747#endif
1748 };
1749 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1750 {
1751 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1752 continue;
1753#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1754 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1755 {
1756 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1757 continue;
1758 }
1759#endif
1760 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1761 {
1762 RTUINT128U const uOldValue = RandU128();
1763 RTUINT128U const uNewValue = RandU128();
1764
1765 /* positive test. */
1766 RTUINT128U uA, uB;
1767 uB = uNewValue;
1768 uA = uOldValue;
1769 *g_pu128 = uOldValue;
1770 uint32_t fEflIn = RandEFlags();
1771 uint32_t fEfl = fEflIn;
1772 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1773 if ( fEfl != (fEflIn | X86_EFL_ZF)
1774 || g_pu128->s.Lo != uNewValue.s.Lo
1775 || g_pu128->s.Hi != uNewValue.s.Hi
1776 || uA.s.Lo != uOldValue.s.Lo
1777 || uA.s.Hi != uOldValue.s.Hi)
1778 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1779 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1780 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1781 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1782 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1783 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1784 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1785 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1786
1787 /* negative */
1788 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1789 *g_pu128 = uExpect;
1790 uA = uOldValue;
1791 uB = uNewValue;
1792 fEfl = fEflIn = RandEFlags();
1793 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1794 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1795 || g_pu128->s.Lo != uExpect.s.Lo
1796 || g_pu128->s.Hi != uExpect.s.Hi
1797 || uA.s.Lo != uExpect.s.Lo
1798 || uA.s.Hi != uExpect.s.Hi)
1799 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1800 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1801 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1802 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1803 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1804 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1805 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1806 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1807 }
1808 }
1809}
1810
1811
1812/*
1813 * Double shifts.
1814 *
1815 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1816 */
1817#ifdef TSTIEMAIMPL_WITH_GENERATOR
1818# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1819void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1820{ \
1821 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1822 { \
1823 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1824 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1825 continue; \
1826 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1827 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1828 { \
1829 a_TestType Test; \
1830 Test.fEflIn = RandEFlags(); \
1831 Test.fEflOut = Test.fEflIn; \
1832 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1833 Test.uDstOut = Test.uDstIn; \
1834 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1835 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1836 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1837 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1838 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1839 } \
1840 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1841 } \
1842}
1843#else
1844# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1845#endif
1846
1847#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1848TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1849\
1850static a_SubTestType const a_aSubTests[] = \
1851{ \
1852 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1853 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1854 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1855 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1856}; \
1857\
1858GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1859\
1860static void ShiftDblU ## a_cBits ## Test(void) \
1861{ \
1862 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1863 { \
1864 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1865 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1866 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1867 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1868 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1869 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1870 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1871 { \
1872 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1873 { \
1874 uint32_t fEfl = paTests[iTest].fEflIn; \
1875 a_Type uDst = paTests[iTest].uDstIn; \
1876 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1877 if ( uDst != paTests[iTest].uDstOut \
1878 || fEfl != paTests[iTest].fEflOut) \
1879 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1880 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1881 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1882 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1883 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1884 else \
1885 { \
1886 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1887 *g_pfEfl = paTests[iTest].fEflIn; \
1888 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1889 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1890 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1891 } \
1892 } \
1893 pfn = a_aSubTests[iFn].pfnNative; \
1894 } \
1895 } \
1896}
1897TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1898TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1899TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1900
1901#ifdef TSTIEMAIMPL_WITH_GENERATOR
1902static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1903{
1904 ShiftDblU16Generate(pOut, cTests);
1905 ShiftDblU32Generate(pOut, cTests);
1906 ShiftDblU64Generate(pOut, cTests);
1907}
1908#endif
1909
1910static void ShiftDblTest(void)
1911{
1912 ShiftDblU16Test();
1913 ShiftDblU32Test();
1914 ShiftDblU64Test();
1915}
1916
1917
1918/*
1919 * Unary operators.
1920 *
1921 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1922 */
1923#ifdef TSTIEMAIMPL_WITH_GENERATOR
1924# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1925void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1926{ \
1927 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1928 { \
1929 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1930 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1931 { \
1932 a_TestType Test; \
1933 Test.fEflIn = RandEFlags(); \
1934 Test.fEflOut = Test.fEflIn; \
1935 Test.uDstIn = RandU ## a_cBits(); \
1936 Test.uDstOut = Test.uDstIn; \
1937 Test.uSrcIn = 0; \
1938 Test.uMisc = 0; \
1939 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1940 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1941 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1942 } \
1943 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1944 } \
1945}
1946#else
1947# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1948#endif
1949
1950#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1951TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1952static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1953{ \
1954 ENTRY(inc_u ## a_cBits), \
1955 ENTRY(inc_u ## a_cBits ## _locked), \
1956 ENTRY(dec_u ## a_cBits), \
1957 ENTRY(dec_u ## a_cBits ## _locked), \
1958 ENTRY(not_u ## a_cBits), \
1959 ENTRY(not_u ## a_cBits ## _locked), \
1960 ENTRY(neg_u ## a_cBits), \
1961 ENTRY(neg_u ## a_cBits ## _locked), \
1962}; \
1963\
1964GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1965\
1966static void UnaryU ## a_cBits ## Test(void) \
1967{ \
1968 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1969 { \
1970 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1971 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1972 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1973 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1974 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1975 { \
1976 uint32_t fEfl = paTests[iTest].fEflIn; \
1977 a_Type uDst = paTests[iTest].uDstIn; \
1978 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1979 if ( uDst != paTests[iTest].uDstOut \
1980 || fEfl != paTests[iTest].fEflOut) \
1981 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1982 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1983 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1984 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1985 else \
1986 { \
1987 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1988 *g_pfEfl = paTests[iTest].fEflIn; \
1989 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1990 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1991 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1992 } \
1993 } \
1994 } \
1995}
1996TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1997TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1998TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1999TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2000
2001#ifdef TSTIEMAIMPL_WITH_GENERATOR
2002static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
2003{
2004 UnaryU8Generate(pOut, cTests);
2005 UnaryU16Generate(pOut, cTests);
2006 UnaryU32Generate(pOut, cTests);
2007 UnaryU64Generate(pOut, cTests);
2008}
2009#endif
2010
2011static void UnaryTest(void)
2012{
2013 UnaryU8Test();
2014 UnaryU16Test();
2015 UnaryU32Test();
2016 UnaryU64Test();
2017}
2018
2019
2020/*
2021 * Shifts.
2022 *
2023 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2024 */
2025#ifdef TSTIEMAIMPL_WITH_GENERATOR
2026# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2027void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2028{ \
2029 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2030 { \
2031 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2032 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2033 continue; \
2034 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2035 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2036 { \
2037 a_TestType Test; \
2038 Test.fEflIn = RandEFlags(); \
2039 Test.fEflOut = Test.fEflIn; \
2040 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2041 Test.uDstOut = Test.uDstIn; \
2042 Test.uSrcIn = 0; \
2043 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2044 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2045 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2046 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2047 \
2048 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2049 Test.fEflOut = Test.fEflIn; \
2050 Test.uDstOut = Test.uDstIn; \
2051 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2052 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2053 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2054 } \
2055 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2056 } \
2057}
2058#else
2059# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2060#endif
2061
2062#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2063TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2064static a_SubTestType const a_aSubTests[] = \
2065{ \
2066 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2067 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2068 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2069 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2070 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2071 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2072 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2073 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2074 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2075 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2076 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2077 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2078 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2079 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2080}; \
2081\
2082GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2083\
2084static void ShiftU ## a_cBits ## Test(void) \
2085{ \
2086 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2087 { \
2088 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2089 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2090 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2091 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2092 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2093 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2094 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2095 { \
2096 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2097 { \
2098 uint32_t fEfl = paTests[iTest].fEflIn; \
2099 a_Type uDst = paTests[iTest].uDstIn; \
2100 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2101 if ( uDst != paTests[iTest].uDstOut \
2102 || fEfl != paTests[iTest].fEflOut ) \
2103 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2104 iTest, iVar == 0 ? "" : "/n", \
2105 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2106 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2107 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2108 else \
2109 { \
2110 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2111 *g_pfEfl = paTests[iTest].fEflIn; \
2112 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2113 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2114 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2115 } \
2116 } \
2117 pfn = a_aSubTests[iFn].pfnNative; \
2118 } \
2119 } \
2120}
2121TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2122TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2123TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2124TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2125
2126#ifdef TSTIEMAIMPL_WITH_GENERATOR
2127static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2128{
2129 ShiftU8Generate(pOut, cTests);
2130 ShiftU16Generate(pOut, cTests);
2131 ShiftU32Generate(pOut, cTests);
2132 ShiftU64Generate(pOut, cTests);
2133}
2134#endif
2135
2136static void ShiftTest(void)
2137{
2138 ShiftU8Test();
2139 ShiftU16Test();
2140 ShiftU32Test();
2141 ShiftU64Test();
2142}
2143
2144
2145/*
2146 * Multiplication and division.
2147 *
2148 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2149 * Note! Currently ignoring undefined bits.
2150 */
2151
2152/* U8 */
2153TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2154static INT_MULDIV_U8_T const g_aMulDivU8[] =
2155{
2156 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2157 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2158 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2159 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2160 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2161 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2162 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2163 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2164 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2165 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2166};
2167
2168#ifdef TSTIEMAIMPL_WITH_GENERATOR
2169static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2170{
2171 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2172 {
2173 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2174 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2175 continue;
2176 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2177 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2178 {
2179 MULDIVU8_TEST_T Test;
2180 Test.fEflIn = RandEFlags();
2181 Test.fEflOut = Test.fEflIn;
2182 Test.uDstIn = RandU16Dst(iTest);
2183 Test.uDstOut = Test.uDstIn;
2184 Test.uSrcIn = RandU8Src(iTest);
2185 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2186 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2187 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2188 }
2189 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2190 }
2191}
2192#endif
2193
2194static void MulDivU8Test(void)
2195{
2196 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2197 {
2198 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2199 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2200 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2201 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2202 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2203 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2204 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2205 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2206 {
2207 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2208 {
2209 uint32_t fEfl = paTests[iTest].fEflIn;
2210 uint16_t uDst = paTests[iTest].uDstIn;
2211 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2212 if ( uDst != paTests[iTest].uDstOut
2213 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2214 || rc != paTests[iTest].rc)
2215 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2216 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2217 "%sexpected %#08x %#06RX16 %d%s\n",
2218 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2219 iVar ? " " : "", fEfl, uDst, rc,
2220 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2221 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2222 else
2223 {
2224 *g_pu16 = paTests[iTest].uDstIn;
2225 *g_pfEfl = paTests[iTest].fEflIn;
2226 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2227 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2228 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2229 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2230 }
2231 }
2232 pfn = g_aMulDivU8[iFn].pfnNative;
2233 }
2234 }
2235}
2236
2237#ifdef TSTIEMAIMPL_WITH_GENERATOR
2238# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2239void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2240{ \
2241 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2242 { \
2243 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2244 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2245 continue; \
2246 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2247 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2248 { \
2249 a_TestType Test; \
2250 Test.fEflIn = RandEFlags(); \
2251 Test.fEflOut = Test.fEflIn; \
2252 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2253 Test.uDst1Out = Test.uDst1In; \
2254 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2255 Test.uDst2Out = Test.uDst2In; \
2256 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2257 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2258 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2259 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2260 Test.rc, iTest); \
2261 } \
2262 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2263 } \
2264}
2265#else
2266# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2267#endif
2268
2269#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2270TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2271static a_SubTestType const a_aSubTests [] = \
2272{ \
2273 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2274 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2275 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2276 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2277 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2278 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2279 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2280 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2281}; \
2282\
2283GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2284\
2285static void MulDivU ## a_cBits ## Test(void) \
2286{ \
2287 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2288 { \
2289 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2290 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2291 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2292 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2293 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2294 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2295 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2296 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2297 { \
2298 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2299 { \
2300 uint32_t fEfl = paTests[iTest].fEflIn; \
2301 a_Type uDst1 = paTests[iTest].uDst1In; \
2302 a_Type uDst2 = paTests[iTest].uDst2In; \
2303 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2304 if ( uDst1 != paTests[iTest].uDst1Out \
2305 || uDst2 != paTests[iTest].uDst2Out \
2306 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2307 || rc != paTests[iTest].rc) \
2308 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2309 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2310 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2311 iTest, iVar == 0 ? "" : "/n", \
2312 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2313 fEfl, uDst1, uDst2, rc, \
2314 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2315 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2316 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2317 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2318 else \
2319 { \
2320 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2321 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2322 *g_pfEfl = paTests[iTest].fEflIn; \
2323 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2324 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2325 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2326 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2327 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2328 } \
2329 } \
2330 pfn = a_aSubTests[iFn].pfnNative; \
2331 } \
2332 } \
2333}
2334TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2335TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2336TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2337
2338#ifdef TSTIEMAIMPL_WITH_GENERATOR
2339static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2340{
2341 MulDivU8Generate(pOut, cTests);
2342 MulDivU16Generate(pOut, cTests);
2343 MulDivU32Generate(pOut, cTests);
2344 MulDivU64Generate(pOut, cTests);
2345}
2346#endif
2347
2348static void MulDivTest(void)
2349{
2350 MulDivU8Test();
2351 MulDivU16Test();
2352 MulDivU32Test();
2353 MulDivU64Test();
2354}
2355
2356
2357/*
2358 * BSWAP
2359 */
2360static void BswapTest(void)
2361{
2362 if (SubTestAndCheckIfEnabled("bswap_u16"))
2363 {
2364 *g_pu32 = UINT32_C(0x12345678);
2365 iemAImpl_bswap_u16(g_pu32);
2366#if 0
2367 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2368#else
2369 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2370#endif
2371 *g_pu32 = UINT32_C(0xffff1122);
2372 iemAImpl_bswap_u16(g_pu32);
2373#if 0
2374 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2375#else
2376 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2377#endif
2378 }
2379
2380 if (SubTestAndCheckIfEnabled("bswap_u32"))
2381 {
2382 *g_pu32 = UINT32_C(0x12345678);
2383 iemAImpl_bswap_u32(g_pu32);
2384 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2385 }
2386
2387 if (SubTestAndCheckIfEnabled("bswap_u64"))
2388 {
2389 *g_pu64 = UINT64_C(0x0123456789abcdef);
2390 iemAImpl_bswap_u64(g_pu64);
2391 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2392 }
2393}
2394
2395
2396
2397/*********************************************************************************************************************************
2398* Floating point (x87 style) *
2399*********************************************************************************************************************************/
2400
2401/*
2402 * FPU constant loading.
2403 */
2404TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2405
2406static const FPU_LD_CONST_T g_aFpuLdConst[] =
2407{
2408 ENTRY(fld1),
2409 ENTRY(fldl2t),
2410 ENTRY(fldl2e),
2411 ENTRY(fldpi),
2412 ENTRY(fldlg2),
2413 ENTRY(fldln2),
2414 ENTRY(fldz),
2415};
2416
2417#ifdef TSTIEMAIMPL_WITH_GENERATOR
2418static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2419{
2420 X86FXSTATE State;
2421 RT_ZERO(State);
2422 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2423 {
2424 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2425 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2426 {
2427 State.FCW = RandFcw();
2428 State.FSW = RandFsw();
2429
2430 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2431 {
2432 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2433 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2434 g_aFpuLdConst[iFn].pfn(&State, &Res);
2435 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2436 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2437 }
2438 }
2439 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2440 }
2441}
2442#endif
2443
2444static void FpuLoadConstTest(void)
2445{
2446 /*
2447 * Inputs:
2448 * - FSW: C0, C1, C2, C3
2449 * - FCW: Exception masks, Precision control, Rounding control.
2450 *
2451 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2452 */
2453 X86FXSTATE State;
2454 RT_ZERO(State);
2455 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2456 {
2457 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2458 continue;
2459
2460 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2461 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2462 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2463 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2464 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2465 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2466 {
2467 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2468 {
2469 State.FCW = paTests[iTest].fFcw;
2470 State.FSW = paTests[iTest].fFswIn;
2471 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2472 pfn(&State, &Res);
2473 if ( Res.FSW != paTests[iTest].fFswOut
2474 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2475 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2476 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2477 Res.FSW, FormatR80(&Res.r80Result),
2478 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2479 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2480 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2481 FormatFcw(paTests[iTest].fFcw) );
2482 }
2483 pfn = g_aFpuLdConst[iFn].pfnNative;
2484 }
2485 }
2486}
2487
2488
2489/*
2490 * Load floating point values from memory.
2491 */
2492#ifdef TSTIEMAIMPL_WITH_GENERATOR
2493# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2494static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2495{ \
2496 X86FXSTATE State; \
2497 RT_ZERO(State); \
2498 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2499 { \
2500 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2501 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2502 { \
2503 State.FCW = RandFcw(); \
2504 State.FSW = RandFsw(); \
2505 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2506 \
2507 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2508 { \
2509 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2510 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2511 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2512 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2513 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2514 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2515 } \
2516 } \
2517 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2518 } \
2519}
2520#else
2521# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2522#endif
2523
2524#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2525typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2526typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2527TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2528\
2529static const a_SubTestType a_aSubTests[] = \
2530{ \
2531 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2532}; \
2533GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2534\
2535static void FpuLdR ## a_cBits ## Test(void) \
2536{ \
2537 X86FXSTATE State; \
2538 RT_ZERO(State); \
2539 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2540 { \
2541 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2542 \
2543 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2544 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2545 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2546 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2547 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2548 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2549 { \
2550 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2551 { \
2552 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2553 State.FCW = paTests[iTest].fFcw; \
2554 State.FSW = paTests[iTest].fFswIn; \
2555 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2556 pfn(&State, &Res, &InVal); \
2557 if ( Res.FSW != paTests[iTest].fFswOut \
2558 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2559 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2560 "%s -> fsw=%#06x %s\n" \
2561 "%s expected %#06x %s%s%s (%s)\n", \
2562 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2563 FormatR ## a_cBits(&paTests[iTest].InVal), \
2564 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2565 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2566 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2567 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2568 FormatFcw(paTests[iTest].fFcw) ); \
2569 } \
2570 pfn = a_aSubTests[iFn].pfnNative; \
2571 } \
2572 } \
2573}
2574
2575TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2576TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2577TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2578
2579#ifdef TSTIEMAIMPL_WITH_GENERATOR
2580static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2581{
2582 FpuLdR80Generate(pOut, cTests);
2583 FpuLdR64Generate(pOut, cTests);
2584 FpuLdR32Generate(pOut, cTests);
2585}
2586#endif
2587
2588static void FpuLdMemTest(void)
2589{
2590 FpuLdR80Test();
2591 FpuLdR64Test();
2592 FpuLdR32Test();
2593}
2594
2595
2596/*
2597 * Load integer values from memory.
2598 */
2599#ifdef TSTIEMAIMPL_WITH_GENERATOR
2600# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2601static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2602{ \
2603 X86FXSTATE State; \
2604 RT_ZERO(State); \
2605 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2606 { \
2607 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2608 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2609 { \
2610 State.FCW = RandFcw(); \
2611 State.FSW = RandFsw(); \
2612 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2613 \
2614 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2615 { \
2616 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2617 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2618 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2619 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2620 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2621 } \
2622 } \
2623 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2624 } \
2625}
2626#else
2627# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2628#endif
2629
2630#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2631typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2632typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2633TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2634\
2635static const a_SubTestType a_aSubTests[] = \
2636{ \
2637 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2638}; \
2639GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2640\
2641static void FpuLdI ## a_cBits ## Test(void) \
2642{ \
2643 X86FXSTATE State; \
2644 RT_ZERO(State); \
2645 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2646 { \
2647 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2648 \
2649 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2650 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2651 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2652 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2653 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2654 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2655 { \
2656 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2657 { \
2658 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2659 State.FCW = paTests[iTest].fFcw; \
2660 State.FSW = paTests[iTest].fFswIn; \
2661 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2662 pfn(&State, &Res, &iInVal); \
2663 if ( Res.FSW != paTests[iTest].fFswOut \
2664 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2665 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2666 "%s -> fsw=%#06x %s\n" \
2667 "%s expected %#06x %s%s%s (%s)\n", \
2668 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2669 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2670 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2671 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2672 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2673 FormatFcw(paTests[iTest].fFcw) ); \
2674 } \
2675 pfn = a_aSubTests[iFn].pfnNative; \
2676 } \
2677 } \
2678}
2679
2680TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2681TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2682TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2683
2684#ifdef TSTIEMAIMPL_WITH_GENERATOR
2685static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2686{
2687 FpuLdI64Generate(pOut, cTests);
2688 FpuLdI32Generate(pOut, cTests);
2689 FpuLdI16Generate(pOut, cTests);
2690}
2691#endif
2692
2693static void FpuLdIntTest(void)
2694{
2695 FpuLdI64Test();
2696 FpuLdI32Test();
2697 FpuLdI16Test();
2698}
2699
2700
2701/*
2702 * Load binary coded decimal values from memory.
2703 */
2704typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2705typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2706TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2707
2708static const FPU_LD_D80_T g_aFpuLdD80[] =
2709{
2710 ENTRY(fld_r80_from_d80)
2711};
2712
2713#ifdef TSTIEMAIMPL_WITH_GENERATOR
2714static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2715{
2716 X86FXSTATE State;
2717 RT_ZERO(State);
2718 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2719 {
2720 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2721 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2722 {
2723 State.FCW = RandFcw();
2724 State.FSW = RandFsw();
2725 RTPBCD80U InVal = RandD80Src(iTest);
2726
2727 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2728 {
2729 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2730 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2731 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2732 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2733 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2734 iTest, iRounding);
2735 }
2736 }
2737 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2738 }
2739}
2740#endif
2741
2742static void FpuLdD80Test(void)
2743{
2744 X86FXSTATE State;
2745 RT_ZERO(State);
2746 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2747 {
2748 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2749 continue;
2750
2751 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2752 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2753 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2754 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2755 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2756 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2757 {
2758 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2759 {
2760 RTPBCD80U const InVal = paTests[iTest].InVal;
2761 State.FCW = paTests[iTest].fFcw;
2762 State.FSW = paTests[iTest].fFswIn;
2763 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2764 pfn(&State, &Res, &InVal);
2765 if ( Res.FSW != paTests[iTest].fFswOut
2766 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2767 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2768 "%s -> fsw=%#06x %s\n"
2769 "%s expected %#06x %s%s%s (%s)\n",
2770 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2771 FormatD80(&paTests[iTest].InVal),
2772 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2773 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2774 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2775 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2776 FormatFcw(paTests[iTest].fFcw) );
2777 }
2778 pfn = g_aFpuLdD80[iFn].pfnNative;
2779 }
2780 }
2781}
2782
2783
2784/*
2785 * Store values floating point values to memory.
2786 */
2787#ifdef TSTIEMAIMPL_WITH_GENERATOR
2788static const RTFLOAT80U g_aFpuStR32Specials[] =
2789{
2790 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2791 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2792 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2793 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2794};
2795static const RTFLOAT80U g_aFpuStR64Specials[] =
2796{
2797 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2798 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2799 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2800 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2801 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2802};
2803static const RTFLOAT80U g_aFpuStR80Specials[] =
2804{
2805 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2806};
2807# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2808static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2809{ \
2810 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2811 X86FXSTATE State; \
2812 RT_ZERO(State); \
2813 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2814 { \
2815 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2816 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2817 { \
2818 uint16_t const fFcw = RandFcw(); \
2819 State.FSW = RandFsw(); \
2820 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2821 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2822 \
2823 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2824 { \
2825 /* PC doesn't influence these, so leave as is. */ \
2826 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2827 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2828 { \
2829 uint16_t uFswOut = 0; \
2830 a_rdType OutVal; \
2831 RT_ZERO(OutVal); \
2832 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2833 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2834 | (iRounding << X86_FCW_RC_SHIFT); \
2835 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2836 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2837 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2838 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2839 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2840 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2841 } \
2842 } \
2843 } \
2844 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2845 } \
2846}
2847#else
2848# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2849#endif
2850
2851#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2852typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2853 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2854typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2855TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2856\
2857static const a_SubTestType a_aSubTests[] = \
2858{ \
2859 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2860}; \
2861GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2862\
2863static void FpuStR ## a_cBits ## Test(void) \
2864{ \
2865 X86FXSTATE State; \
2866 RT_ZERO(State); \
2867 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2868 { \
2869 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2870 \
2871 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2872 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2873 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2874 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2875 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2876 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2877 { \
2878 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2879 { \
2880 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2881 uint16_t uFswOut = 0; \
2882 a_rdType OutVal; \
2883 RT_ZERO(OutVal); \
2884 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2885 State.FCW = paTests[iTest].fFcw; \
2886 State.FSW = paTests[iTest].fFswIn; \
2887 pfn(&State, &uFswOut, &OutVal, &InVal); \
2888 if ( uFswOut != paTests[iTest].fFswOut \
2889 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2890 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2891 "%s -> fsw=%#06x %s\n" \
2892 "%s expected %#06x %s%s%s (%s)\n", \
2893 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2894 FormatR80(&paTests[iTest].InVal), \
2895 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2896 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2897 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2898 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2899 FormatFcw(paTests[iTest].fFcw) ); \
2900 } \
2901 pfn = a_aSubTests[iFn].pfnNative; \
2902 } \
2903 } \
2904}
2905
2906TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2907TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2908TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2909
2910#ifdef TSTIEMAIMPL_WITH_GENERATOR
2911static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2912{
2913 FpuStR80Generate(pOut, cTests);
2914 FpuStR64Generate(pOut, cTests);
2915 FpuStR32Generate(pOut, cTests);
2916}
2917#endif
2918
2919static void FpuStMemTest(void)
2920{
2921 FpuStR80Test();
2922 FpuStR64Test();
2923 FpuStR32Test();
2924}
2925
2926
2927/*
2928 * Store integer values to memory or register.
2929 */
2930TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2931TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2932TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2933
2934static const FPU_ST_I16_T g_aFpuStI16[] =
2935{
2936 ENTRY(fist_r80_to_i16),
2937 ENTRY_AMD( fistt_r80_to_i16, 0),
2938 ENTRY_INTEL(fistt_r80_to_i16, 0),
2939};
2940static const FPU_ST_I32_T g_aFpuStI32[] =
2941{
2942 ENTRY(fist_r80_to_i32),
2943 ENTRY(fistt_r80_to_i32),
2944};
2945static const FPU_ST_I64_T g_aFpuStI64[] =
2946{
2947 ENTRY(fist_r80_to_i64),
2948 ENTRY(fistt_r80_to_i64),
2949};
2950
2951#ifdef TSTIEMAIMPL_WITH_GENERATOR
2952static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2953{
2954 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2955 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2956 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2957 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2958 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2959 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2960 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2961 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2962 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2963 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2964 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2965 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2966 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2967 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2968 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2969 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2970 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2971 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2972 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2973 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2974 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2975 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2976 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2977 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2978 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2979 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2980 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2981 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2982 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2983 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2984 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2985 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2986 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2987 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2988 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2989 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2990 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2991 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2992 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2993 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2994 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2995 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2996 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2997 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2998 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2999 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3000 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3001};
3002static const RTFLOAT80U g_aFpuStI32Specials[] =
3003{
3004 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3005 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3006 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3007 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3008 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3009 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3010 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3011 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3012 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3013 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3014 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3015 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3016 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3017 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3018 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3019 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3020};
3021static const RTFLOAT80U g_aFpuStI64Specials[] =
3022{
3023 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3024 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3025 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3026 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3027 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3028 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3029 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3030 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3031 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3032 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3033 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3034 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3035 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3036 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3037 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3038 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3039 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3040};
3041
3042# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3043static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3044{ \
3045 X86FXSTATE State; \
3046 RT_ZERO(State); \
3047 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3048 { \
3049 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3050 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3051 PRTSTREAM pOutFn = pOut; \
3052 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3053 { \
3054 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3055 continue; \
3056 pOutFn = pOutCpu; \
3057 } \
3058 \
3059 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3060 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3061 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3062 { \
3063 uint16_t const fFcw = RandFcw(); \
3064 State.FSW = RandFsw(); \
3065 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3066 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3067 \
3068 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3069 { \
3070 /* PC doesn't influence these, so leave as is. */ \
3071 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3072 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3073 { \
3074 uint16_t uFswOut = 0; \
3075 a_iType iOutVal = ~(a_iType)2; \
3076 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3077 | (iRounding << X86_FCW_RC_SHIFT); \
3078 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3079 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3080 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3081 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3082 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3083 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3084 } \
3085 } \
3086 } \
3087 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3088 } \
3089}
3090#else
3091# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3092#endif
3093
3094#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3095GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3096\
3097static void FpuStI ## a_cBits ## Test(void) \
3098{ \
3099 X86FXSTATE State; \
3100 RT_ZERO(State); \
3101 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3102 { \
3103 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3104 \
3105 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3106 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3107 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3108 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3109 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3110 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3111 { \
3112 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3113 { \
3114 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3115 uint16_t uFswOut = 0; \
3116 a_iType iOutVal = ~(a_iType)2; \
3117 State.FCW = paTests[iTest].fFcw; \
3118 State.FSW = paTests[iTest].fFswIn; \
3119 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3120 if ( uFswOut != paTests[iTest].fFswOut \
3121 || iOutVal != paTests[iTest].iOutVal) \
3122 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3123 "%s -> fsw=%#06x " a_szFmt "\n" \
3124 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3125 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3126 FormatR80(&paTests[iTest].InVal), \
3127 iVar ? " " : "", uFswOut, iOutVal, \
3128 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3129 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3130 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3131 } \
3132 pfn = a_aSubTests[iFn].pfnNative; \
3133 } \
3134 } \
3135}
3136
3137//fistt_r80_to_i16 diffs for AMD, of course :-)
3138
3139TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3140TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3141TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3142
3143#ifdef TSTIEMAIMPL_WITH_GENERATOR
3144static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3145{
3146 FpuStI64Generate(pOut, pOutCpu, cTests);
3147 FpuStI32Generate(pOut, pOutCpu, cTests);
3148 FpuStI16Generate(pOut, pOutCpu, cTests);
3149}
3150#endif
3151
3152static void FpuStIntTest(void)
3153{
3154 FpuStI64Test();
3155 FpuStI32Test();
3156 FpuStI16Test();
3157}
3158
3159
3160/*
3161 * Store as packed BCD value (memory).
3162 */
3163typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3164typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3165TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3166
3167static const FPU_ST_D80_T g_aFpuStD80[] =
3168{
3169 ENTRY(fst_r80_to_d80),
3170};
3171
3172#ifdef TSTIEMAIMPL_WITH_GENERATOR
3173static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3174{
3175 static RTFLOAT80U const s_aSpecials[] =
3176 {
3177 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3178 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3179 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3180 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3181 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3182 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3183 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3184 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3185 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3186 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3187 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3188 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3189 };
3190
3191 X86FXSTATE State;
3192 RT_ZERO(State);
3193 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3194 {
3195 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3196 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3197 {
3198 uint16_t const fFcw = RandFcw();
3199 State.FSW = RandFsw();
3200 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3201
3202 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3203 {
3204 /* PC doesn't influence these, so leave as is. */
3205 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3206 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3207 {
3208 uint16_t uFswOut = 0;
3209 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3210 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3211 | (iRounding << X86_FCW_RC_SHIFT);
3212 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3213 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3214 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3215 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3216 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3217 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3218 }
3219 }
3220 }
3221 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3222 }
3223}
3224#endif
3225
3226
3227static void FpuStD80Test(void)
3228{
3229 X86FXSTATE State;
3230 RT_ZERO(State);
3231 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3232 {
3233 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3234 continue;
3235
3236 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3237 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3238 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3239 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3240 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3241 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3242 {
3243 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3244 {
3245 RTFLOAT80U const InVal = paTests[iTest].InVal;
3246 uint16_t uFswOut = 0;
3247 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3248 State.FCW = paTests[iTest].fFcw;
3249 State.FSW = paTests[iTest].fFswIn;
3250 pfn(&State, &uFswOut, &OutVal, &InVal);
3251 if ( uFswOut != paTests[iTest].fFswOut
3252 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3253 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3254 "%s -> fsw=%#06x %s\n"
3255 "%s expected %#06x %s%s%s (%s)\n",
3256 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3257 FormatR80(&paTests[iTest].InVal),
3258 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3259 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3260 FswDiff(uFswOut, paTests[iTest].fFswOut),
3261 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3262 FormatFcw(paTests[iTest].fFcw) );
3263 }
3264 pfn = g_aFpuStD80[iFn].pfnNative;
3265 }
3266 }
3267}
3268
3269
3270
3271/*********************************************************************************************************************************
3272* x87 FPU Binary Operations *
3273*********************************************************************************************************************************/
3274
3275/*
3276 * Binary FPU operations on two 80-bit floating point values.
3277 */
3278TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3279enum { kFpuBinaryHint_fprem = 1, };
3280
3281static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3282{
3283 ENTRY(fadd_r80_by_r80),
3284 ENTRY(fsub_r80_by_r80),
3285 ENTRY(fsubr_r80_by_r80),
3286 ENTRY(fmul_r80_by_r80),
3287 ENTRY(fdiv_r80_by_r80),
3288 ENTRY(fdivr_r80_by_r80),
3289 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3290 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3291 ENTRY(fscale_r80_by_r80),
3292 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3293 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3294 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3295 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3296 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3297 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3298};
3299
3300#ifdef TSTIEMAIMPL_WITH_GENERATOR
3301static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3302{
3303 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3304
3305 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3306 {
3307 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3308 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3309 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3310 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3311 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3312 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3313 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3314 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3315 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3316 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3317 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3318 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3319 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3320 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3321 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3322 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3323 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3324 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3325 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3326 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3327 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3328 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3329 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3330 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3331 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3332 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3333 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3334 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3335 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3336 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3337 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3338 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3339 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3340 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3341 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3342 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3343 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3344 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3345 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3346 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3347 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3348 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3349 /* fscale: Negative variants for the essentials of the above. */
3350 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3351 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3352 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3353 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3354 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3355 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3356 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3357 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3358 /* fscale: Some fun with denormals and pseudo-denormals. */
3359 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3360 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3361 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3362 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3363 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3364 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3365 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3366 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3367 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3368 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3369 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3370 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3371 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3372 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3373 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3374 };
3375
3376 X86FXSTATE State;
3377 RT_ZERO(State);
3378 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3379 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3380 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3381 {
3382 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3383 PRTSTREAM pOutFn = pOut;
3384 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3385 {
3386 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3387 continue;
3388 pOutFn = pOutCpu;
3389 }
3390
3391 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3392 uint32_t iTestOutput = 0;
3393 uint32_t cNormalInputPairs = 0;
3394 uint32_t cTargetRangeInputs = 0;
3395 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3396 {
3397 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3398 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3399 bool fTargetRange = false;
3400 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3401 {
3402 cNormalInputPairs++;
3403 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3404 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3405 cTargetRangeInputs += fTargetRange = true;
3406 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3407 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3408 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3409 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3410 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3411 cTargetRangeInputs += fTargetRange = true;
3412 }
3413 }
3414 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3415 {
3416 iTest -= 1;
3417 continue;
3418 }
3419
3420 uint16_t const fFcwExtra = 0;
3421 uint16_t const fFcw = RandFcw();
3422 State.FSW = RandFsw();
3423
3424 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3425 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3426 {
3427 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3428 | (iRounding << X86_FCW_RC_SHIFT)
3429 | (iPrecision << X86_FCW_PC_SHIFT)
3430 | X86_FCW_MASK_ALL;
3431 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3432 pfn(&State, &ResM, &InVal1, &InVal2);
3433 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3434 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3435 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3436
3437 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3438 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3439 pfn(&State, &ResU, &InVal1, &InVal2);
3440 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3441 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3442 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3443
3444 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3445 if (fXcpt)
3446 {
3447 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3448 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3449 pfn(&State, &Res1, &InVal1, &InVal2);
3450 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3451 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3452 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3453 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3454 {
3455 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3456 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3457 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3458 pfn(&State, &Res2, &InVal1, &InVal2);
3459 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3460 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3461 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3462 }
3463 if (!RT_IS_POWER_OF_TWO(fXcpt))
3464 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3465 if (fUnmasked & fXcpt)
3466 {
3467 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3468 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3469 pfn(&State, &Res3, &InVal1, &InVal2);
3470 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3471 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3472 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3473 }
3474 }
3475
3476 /* If the values are in range and caused no exceptions, do the whole series of
3477 partial reminders till we get the non-partial one or run into an exception. */
3478 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3479 {
3480 IEMFPURESULT ResPrev = ResM;
3481 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3482 {
3483 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3484 State.FSW = ResPrev.FSW;
3485 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3486 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3487 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3488 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3489 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3490 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3491 ResPrev = ResSeq;
3492 }
3493 }
3494 }
3495 }
3496 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3497 }
3498}
3499#endif
3500
3501
3502static void FpuBinaryR80Test(void)
3503{
3504 X86FXSTATE State;
3505 RT_ZERO(State);
3506 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3507 {
3508 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3509 continue;
3510
3511 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3512 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3513 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3514 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3515 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3516 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3517 {
3518 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3519 {
3520 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3521 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3522 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3523 State.FCW = paTests[iTest].fFcw;
3524 State.FSW = paTests[iTest].fFswIn;
3525 pfn(&State, &Res, &InVal1, &InVal2);
3526 if ( Res.FSW != paTests[iTest].fFswOut
3527 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3528 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3529 "%s -> fsw=%#06x %s\n"
3530 "%s expected %#06x %s%s%s (%s)\n",
3531 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3532 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3533 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3534 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3535 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3536 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3537 FormatFcw(paTests[iTest].fFcw) );
3538 }
3539 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3540 }
3541 }
3542}
3543
3544
3545/*
3546 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3547 */
3548#define int64_t_IS_NORMAL(a) 1
3549#define int32_t_IS_NORMAL(a) 1
3550#define int16_t_IS_NORMAL(a) 1
3551
3552#ifdef TSTIEMAIMPL_WITH_GENERATOR
3553static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3554{
3555 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3556 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3557};
3558static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3559{
3560 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3561 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3562};
3563static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3564{
3565 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3566};
3567static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3568{
3569 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3570};
3571
3572# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3573static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3574{ \
3575 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3576 \
3577 X86FXSTATE State; \
3578 RT_ZERO(State); \
3579 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3580 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3581 { \
3582 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3583 uint32_t cNormalInputPairs = 0; \
3584 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3585 { \
3586 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3587 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3588 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3589 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3590 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3591 cNormalInputPairs++; \
3592 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3593 { \
3594 iTest -= 1; \
3595 continue; \
3596 } \
3597 \
3598 uint16_t const fFcw = RandFcw(); \
3599 State.FSW = RandFsw(); \
3600 \
3601 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3602 { \
3603 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3604 { \
3605 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3606 { \
3607 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3608 | (iRounding << X86_FCW_RC_SHIFT) \
3609 | (iPrecision << X86_FCW_PC_SHIFT) \
3610 | iMask; \
3611 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3612 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3613 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3614 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3615 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3616 } \
3617 } \
3618 } \
3619 } \
3620 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3621 } \
3622}
3623#else
3624# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3625#endif
3626
3627#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3628TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3629\
3630static const a_SubTestType a_aSubTests[] = \
3631{ \
3632 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3633 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3634 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3635 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3636 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3637 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3638}; \
3639\
3640GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3641\
3642static void FpuBinary ## a_UpBits ## Test(void) \
3643{ \
3644 X86FXSTATE State; \
3645 RT_ZERO(State); \
3646 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3647 { \
3648 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3649 \
3650 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3651 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3652 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3653 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3654 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3655 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3656 { \
3657 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3658 { \
3659 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3660 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3661 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3662 State.FCW = paTests[iTest].fFcw; \
3663 State.FSW = paTests[iTest].fFswIn; \
3664 pfn(&State, &Res, &InVal1, &InVal2); \
3665 if ( Res.FSW != paTests[iTest].fFswOut \
3666 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3667 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3668 "%s -> fsw=%#06x %s\n" \
3669 "%s expected %#06x %s%s%s (%s)\n", \
3670 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3671 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3672 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3673 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3674 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3675 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3676 FormatFcw(paTests[iTest].fFcw) ); \
3677 } \
3678 pfn = a_aSubTests[iFn].pfnNative; \
3679 } \
3680 } \
3681}
3682
3683TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3684TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3685TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3686TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3687
3688
3689/*
3690 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3691 */
3692#ifdef TSTIEMAIMPL_WITH_GENERATOR
3693static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3694{
3695 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3696 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3697};
3698static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3699{
3700 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3701 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3702};
3703static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3704{
3705 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3706 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3707};
3708static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3709{
3710 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3711};
3712static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3713{
3714 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3715};
3716
3717# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3718static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3719{ \
3720 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3721 \
3722 X86FXSTATE State; \
3723 RT_ZERO(State); \
3724 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3725 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3726 { \
3727 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3728 uint32_t cNormalInputPairs = 0; \
3729 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3730 { \
3731 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3732 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3733 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3734 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3735 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3736 cNormalInputPairs++; \
3737 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3738 { \
3739 iTest -= 1; \
3740 continue; \
3741 } \
3742 \
3743 uint16_t const fFcw = RandFcw(); \
3744 State.FSW = RandFsw(); \
3745 \
3746 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3747 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3748 { \
3749 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3750 uint16_t fFswOut = 0; \
3751 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3752 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3753 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3754 iTest, iMask ? 'c' : 'u'); \
3755 } \
3756 } \
3757 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3758 } \
3759}
3760#else
3761# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3762#endif
3763
3764#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3765TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3766\
3767static const a_SubTestType a_aSubTests[] = \
3768{ \
3769 __VA_ARGS__ \
3770}; \
3771\
3772GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3773\
3774static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3775{ \
3776 X86FXSTATE State; \
3777 RT_ZERO(State); \
3778 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3779 { \
3780 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3781 \
3782 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3783 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3784 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3785 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3786 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3787 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3788 { \
3789 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3790 { \
3791 uint16_t fFswOut = 0; \
3792 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3793 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3794 State.FCW = paTests[iTest].fFcw; \
3795 State.FSW = paTests[iTest].fFswIn; \
3796 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3797 if (fFswOut != paTests[iTest].fFswOut) \
3798 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3799 "%s -> fsw=%#06x\n" \
3800 "%s expected %#06x %s (%s)\n", \
3801 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3802 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3803 iVar ? " " : "", fFswOut, \
3804 iVar ? " " : "", paTests[iTest].fFswOut, \
3805 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3806 } \
3807 pfn = a_aSubTests[iFn].pfnNative; \
3808 } \
3809 } \
3810}
3811
3812TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3813TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3814TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3815TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3816TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3817
3818
3819/*
3820 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3821 */
3822TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3823
3824static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3825{
3826 ENTRY(fcomi_r80_by_r80),
3827 ENTRY(fucomi_r80_by_r80),
3828};
3829
3830#ifdef TSTIEMAIMPL_WITH_GENERATOR
3831static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3832{
3833 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3834 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3835};
3836
3837static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3838{
3839 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3840
3841 X86FXSTATE State;
3842 RT_ZERO(State);
3843 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3844 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3845 {
3846 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3847 uint32_t cNormalInputPairs = 0;
3848 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3849 {
3850 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3851 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3852 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3853 cNormalInputPairs++;
3854 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3855 {
3856 iTest -= 1;
3857 continue;
3858 }
3859
3860 uint16_t const fFcw = RandFcw();
3861 State.FSW = RandFsw();
3862
3863 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3864 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3865 {
3866 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3867 uint16_t uFswOut = 0;
3868 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3869 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3870 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3871 iTest, iMask ? 'c' : 'u');
3872 }
3873 }
3874 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3875 }
3876}
3877#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3878
3879static void FpuBinaryEflR80Test(void)
3880{
3881 X86FXSTATE State;
3882 RT_ZERO(State);
3883 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3884 {
3885 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3886 continue;
3887
3888 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3889 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3890 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3891 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3892 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3893 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3894 {
3895 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3896 {
3897 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3898 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3899 State.FCW = paTests[iTest].fFcw;
3900 State.FSW = paTests[iTest].fFswIn;
3901 uint16_t uFswOut = 0;
3902 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3903 if ( uFswOut != paTests[iTest].fFswOut
3904 || fEflOut != paTests[iTest].fEflOut)
3905 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3906 "%s -> fsw=%#06x efl=%#08x\n"
3907 "%s expected %#06x %#08x %s%s (%s)\n",
3908 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3909 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3910 iVar ? " " : "", uFswOut, fEflOut,
3911 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3912 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3913 FormatFcw(paTests[iTest].fFcw));
3914 }
3915 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3916 }
3917 }
3918}
3919
3920
3921/*********************************************************************************************************************************
3922* x87 FPU Unary Operations *
3923*********************************************************************************************************************************/
3924
3925/*
3926 * Unary FPU operations on one 80-bit floating point value.
3927 *
3928 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3929 * a rounding error or not.
3930 */
3931TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3932
3933enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3934static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3935{
3936 ENTRY_EX( fabs_r80, kUnary_Accurate),
3937 ENTRY_EX( fchs_r80, kUnary_Accurate),
3938 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3939 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3940 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3941 ENTRY_EX( frndint_r80, kUnary_Accurate),
3942 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3943 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3944 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3945 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3946};
3947
3948#ifdef TSTIEMAIMPL_WITH_GENERATOR
3949
3950static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3951{
3952 if ( enmKind == kUnary_Rounding_F2xm1
3953 && RTFLOAT80U_IS_NORMAL(pr80Val)
3954 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3955 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3956 return true;
3957 return false;
3958}
3959
3960static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3961{
3962 static RTFLOAT80U const s_aSpecials[] =
3963 {
3964 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3965 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3966 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3967 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3968 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3969 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3970 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3971 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3972 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3973 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3974 };
3975 X86FXSTATE State;
3976 RT_ZERO(State);
3977 uint32_t cMinNormals = cTests / 4;
3978 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3979 {
3980 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3981 PRTSTREAM pOutFn = pOut;
3982 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3983 {
3984 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3985 continue;
3986 pOutFn = pOutCpu;
3987 }
3988
3989 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3990 uint32_t iTestOutput = 0;
3991 uint32_t cNormalInputs = 0;
3992 uint32_t cTargetRangeInputs = 0;
3993 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3994 {
3995 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3996 if (RTFLOAT80U_IS_NORMAL(&InVal))
3997 {
3998 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3999 {
4000 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4001 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4002 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4003 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4004 cTargetRangeInputs++;
4005 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4006 {
4007 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4008 cTargetRangeInputs++;
4009 }
4010 }
4011 cNormalInputs++;
4012 }
4013 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4014 {
4015 iTest -= 1;
4016 continue;
4017 }
4018
4019 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4020 uint16_t const fFcw = RandFcw();
4021 State.FSW = RandFsw();
4022
4023 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4024 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4025 {
4026 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4027 | (iRounding << X86_FCW_RC_SHIFT)
4028 | (iPrecision << X86_FCW_PC_SHIFT)
4029 | X86_FCW_MASK_ALL;
4030 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4031 pfn(&State, &ResM, &InVal);
4032 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4033 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4034 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4035
4036 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4037 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4038 pfn(&State, &ResU, &InVal);
4039 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4040 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4041 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4042
4043 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4044 if (fXcpt)
4045 {
4046 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4047 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4048 pfn(&State, &Res1, &InVal);
4049 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4050 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4051 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4052 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4053 {
4054 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4055 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4056 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4057 pfn(&State, &Res2, &InVal);
4058 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4059 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4060 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4061 }
4062 if (!RT_IS_POWER_OF_TWO(fXcpt))
4063 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4064 if (fUnmasked & fXcpt)
4065 {
4066 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4067 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4068 pfn(&State, &Res3, &InVal);
4069 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4070 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4071 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4072 }
4073 }
4074 }
4075 }
4076 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4077 }
4078}
4079#endif
4080
4081static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4082{
4083 if (fFcw1 == fFcw2)
4084 return true;
4085 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4086 {
4087 *pfRndErr = true;
4088 return true;
4089 }
4090 return false;
4091}
4092
4093static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4094{
4095 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4096 return true;
4097 if ( fRndErrOk
4098 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4099 {
4100 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4101 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4102 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4103 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4104 ||
4105 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4106 && pr80Val1->s.uMantissa == UINT64_MAX
4107 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4108 ||
4109 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4110 && pr80Val2->s.uMantissa == UINT64_MAX
4111 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4112 {
4113 *pfRndErr = true;
4114 return true;
4115 }
4116 }
4117 return false;
4118}
4119
4120
4121static void FpuUnaryR80Test(void)
4122{
4123 X86FXSTATE State;
4124 RT_ZERO(State);
4125 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4126 {
4127 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4128 continue;
4129
4130 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4131 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4132 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4133 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4134 uint32_t cRndErrs = 0;
4135 uint32_t cPossibleRndErrs = 0;
4136 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4137 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4138 {
4139 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4140 {
4141 RTFLOAT80U const InVal = paTests[iTest].InVal;
4142 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4143 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4144 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4145 State.FSW = paTests[iTest].fFswIn;
4146 pfn(&State, &Res, &InVal);
4147 bool fRndErr = false;
4148 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4149 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4150 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4151 "%s -> fsw=%#06x %s\n"
4152 "%s expected %#06x %s%s%s%s (%s)\n",
4153 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4154 FormatR80(&paTests[iTest].InVal),
4155 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4156 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4157 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4158 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4159 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4160 cRndErrs += fRndErr;
4161 cPossibleRndErrs += fRndErrOk;
4162 }
4163 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4164 }
4165 if (cPossibleRndErrs > 0)
4166 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4167 }
4168}
4169
4170
4171/*
4172 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4173 */
4174TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4175
4176static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4177{
4178 ENTRY(ftst_r80),
4179 ENTRY_EX(fxam_r80, 1),
4180};
4181
4182#ifdef TSTIEMAIMPL_WITH_GENERATOR
4183static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4184{
4185 static RTFLOAT80U const s_aSpecials[] =
4186 {
4187 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4188 };
4189
4190 X86FXSTATE State;
4191 RT_ZERO(State);
4192 uint32_t cMinNormals = cTests / 4;
4193 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4194 {
4195 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4196 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4197 PRTSTREAM pOutFn = pOut;
4198 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4199 {
4200 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4201 continue;
4202 pOutFn = pOutCpu;
4203 }
4204 State.FTW = 0;
4205
4206 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4207 uint32_t cNormalInputs = 0;
4208 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4209 {
4210 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4211 if (RTFLOAT80U_IS_NORMAL(&InVal))
4212 cNormalInputs++;
4213 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4214 {
4215 iTest -= 1;
4216 continue;
4217 }
4218
4219 uint16_t const fFcw = RandFcw();
4220 State.FSW = RandFsw();
4221 if (!fIsFxam)
4222 {
4223 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4224 {
4225 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4226 {
4227 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4228 {
4229 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4230 | (iRounding << X86_FCW_RC_SHIFT)
4231 | (iPrecision << X86_FCW_PC_SHIFT)
4232 | iMask;
4233 uint16_t fFswOut = 0;
4234 pfn(&State, &fFswOut, &InVal);
4235 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4236 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4237 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4238 }
4239 }
4240 }
4241 }
4242 else
4243 {
4244 uint16_t fFswOut = 0;
4245 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4246 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4247 State.FCW = fFcw;
4248 pfn(&State, &fFswOut, &InVal);
4249 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4250 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4251 }
4252 }
4253 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4254 }
4255}
4256#endif
4257
4258
4259static void FpuUnaryFswR80Test(void)
4260{
4261 X86FXSTATE State;
4262 RT_ZERO(State);
4263 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4264 {
4265 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4266 continue;
4267
4268 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4269 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4270 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4271 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4272 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4273 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4274 {
4275 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4276 {
4277 RTFLOAT80U const InVal = paTests[iTest].InVal;
4278 uint16_t fFswOut = 0;
4279 State.FSW = paTests[iTest].fFswIn;
4280 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4281 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4282 pfn(&State, &fFswOut, &InVal);
4283 if (fFswOut != paTests[iTest].fFswOut)
4284 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4285 "%s -> fsw=%#06x\n"
4286 "%s expected %#06x %s (%s%s)\n",
4287 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4288 FormatR80(&paTests[iTest].InVal),
4289 iVar ? " " : "", fFswOut,
4290 iVar ? " " : "", paTests[iTest].fFswOut,
4291 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4292 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4293 }
4294 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4295 }
4296 }
4297}
4298
4299/*
4300 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4301 */
4302TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4303
4304static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4305{
4306 ENTRY(fxtract_r80_r80),
4307 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4308 ENTRY_INTEL(fptan_r80_r80, 0),
4309 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4310 ENTRY_INTEL(fsincos_r80_r80, 0),
4311};
4312
4313#ifdef TSTIEMAIMPL_WITH_GENERATOR
4314static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4315{
4316 static RTFLOAT80U const s_aSpecials[] =
4317 {
4318 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4319 };
4320
4321 X86FXSTATE State;
4322 RT_ZERO(State);
4323 uint32_t cMinNormals = cTests / 4;
4324 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4325 {
4326 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4327 PRTSTREAM pOutFn = pOut;
4328 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4329 {
4330 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4331 continue;
4332 pOutFn = pOutCpu;
4333 }
4334
4335 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4336 uint32_t iTestOutput = 0;
4337 uint32_t cNormalInputs = 0;
4338 uint32_t cTargetRangeInputs = 0;
4339 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4340 {
4341 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4342 if (RTFLOAT80U_IS_NORMAL(&InVal))
4343 {
4344 if (iFn != 0)
4345 {
4346 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4347 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4348 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4349 cTargetRangeInputs++;
4350 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4351 {
4352 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4353 cTargetRangeInputs++;
4354 }
4355 }
4356 cNormalInputs++;
4357 }
4358 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4359 {
4360 iTest -= 1;
4361 continue;
4362 }
4363
4364 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4365 uint16_t const fFcw = RandFcw();
4366 State.FSW = RandFsw();
4367
4368 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4369 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4370 {
4371 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4372 | (iRounding << X86_FCW_RC_SHIFT)
4373 | (iPrecision << X86_FCW_PC_SHIFT)
4374 | X86_FCW_MASK_ALL;
4375 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4376 pfn(&State, &ResM, &InVal);
4377 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4378 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4379 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4380
4381 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4382 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4383 pfn(&State, &ResU, &InVal);
4384 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4385 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4386 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4387
4388 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4389 if (fXcpt)
4390 {
4391 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4392 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4393 pfn(&State, &Res1, &InVal);
4394 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4395 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4396 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4397 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4398 {
4399 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4400 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4401 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4402 pfn(&State, &Res2, &InVal);
4403 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4404 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4405 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4406 }
4407 if (!RT_IS_POWER_OF_TWO(fXcpt))
4408 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4409 if (fUnmasked & fXcpt)
4410 {
4411 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4412 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4413 pfn(&State, &Res3, &InVal);
4414 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4415 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4416 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4417 }
4418 }
4419 }
4420 }
4421 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4422 }
4423}
4424#endif
4425
4426
4427static void FpuUnaryTwoR80Test(void)
4428{
4429 X86FXSTATE State;
4430 RT_ZERO(State);
4431 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4432 {
4433 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4434 continue;
4435
4436 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4437 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4438 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4439 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4440 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4441 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4442 {
4443 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4444 {
4445 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4446 RTFLOAT80U const InVal = paTests[iTest].InVal;
4447 State.FCW = paTests[iTest].fFcw;
4448 State.FSW = paTests[iTest].fFswIn;
4449 pfn(&State, &Res, &InVal);
4450 if ( Res.FSW != paTests[iTest].fFswOut
4451 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4452 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4453 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4454 "%s -> fsw=%#06x %s %s\n"
4455 "%s expected %#06x %s %s %s%s%s (%s)\n",
4456 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4457 FormatR80(&paTests[iTest].InVal),
4458 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4459 iVar ? " " : "", paTests[iTest].fFswOut,
4460 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4461 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4462 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4463 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4464 }
4465 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4466 }
4467 }
4468}
4469
4470
4471/*********************************************************************************************************************************
4472* SSE floating point Binary Operations *
4473*********************************************************************************************************************************/
4474
4475/*
4476 * Binary SSE operations on packed single precision floating point values.
4477 */
4478TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4479
4480static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4481{
4482 ENTRY_BIN(addps_u128),
4483 ENTRY_BIN(mulps_u128),
4484 ENTRY_BIN(subps_u128),
4485 ENTRY_BIN(minps_u128),
4486 ENTRY_BIN(divps_u128),
4487 ENTRY_BIN(maxps_u128),
4488 ENTRY_BIN(haddps_u128),
4489 ENTRY_BIN(hsubps_u128),
4490 ENTRY_BIN(sqrtps_u128),
4491 ENTRY_BIN(addsubps_u128),
4492};
4493
4494#ifdef TSTIEMAIMPL_WITH_GENERATOR
4495static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4496{
4497 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4498
4499 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4500 {
4501 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4502 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4503 /** @todo More specials. */
4504 };
4505
4506 X86FXSTATE State;
4507 RT_ZERO(State);
4508 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4509 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4510 {
4511 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4512
4513 PRTSTREAM pStrmOut = NULL;
4514 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4515 if (RT_FAILURE(rc))
4516 {
4517 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4518 return RTEXITCODE_FAILURE;
4519 }
4520
4521 uint32_t cNormalInputPairs = 0;
4522 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4523 {
4524 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4525
4526 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4527 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4528 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4529 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4530
4531 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4532 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4533 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4534 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4535
4536 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4537 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4538 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4539 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4540 cNormalInputPairs++;
4541 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4542 {
4543 iTest -= 1;
4544 continue;
4545 }
4546
4547 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4548 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4549 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4550 for (uint8_t iFz = 0; iFz < 2; iFz++)
4551 {
4552 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4553 | (iRounding << X86_MXCSR_RC_SHIFT)
4554 | (iDaz ? X86_MXCSR_DAZ : 0)
4555 | (iFz ? X86_MXCSR_FZ : 0)
4556 | X86_MXCSR_XCPT_MASK;
4557 IEMSSERESULT ResM; RT_ZERO(ResM);
4558 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4559 TestData.fMxcsrIn = State.MXCSR;
4560 TestData.fMxcsrOut = ResM.MXCSR;
4561 TestData.OutVal = ResM.uResult;
4562 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4563
4564 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4565 IEMSSERESULT ResU; RT_ZERO(ResU);
4566 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4567 TestData.fMxcsrIn = State.MXCSR;
4568 TestData.fMxcsrOut = ResU.MXCSR;
4569 TestData.OutVal = ResU.uResult;
4570 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4571
4572 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4573 if (fXcpt)
4574 {
4575 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4576 IEMSSERESULT Res1; RT_ZERO(Res1);
4577 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4578 TestData.fMxcsrIn = State.MXCSR;
4579 TestData.fMxcsrOut = Res1.MXCSR;
4580 TestData.OutVal = Res1.uResult;
4581 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4582
4583 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4584 {
4585 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4586 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4587 IEMSSERESULT Res2; RT_ZERO(Res2);
4588 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4589 TestData.fMxcsrIn = State.MXCSR;
4590 TestData.fMxcsrOut = Res2.MXCSR;
4591 TestData.OutVal = Res2.uResult;
4592 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4593 }
4594 if (!RT_IS_POWER_OF_TWO(fXcpt))
4595 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4596 if (fUnmasked & fXcpt)
4597 {
4598 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4599 IEMSSERESULT Res3; RT_ZERO(Res3);
4600 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4601 TestData.fMxcsrIn = State.MXCSR;
4602 TestData.fMxcsrOut = Res3.MXCSR;
4603 TestData.OutVal = Res3.uResult;
4604 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4605 }
4606 }
4607 }
4608 }
4609 rc = RTStrmClose(pStrmOut);
4610 if (RT_FAILURE(rc))
4611 {
4612 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4613 return RTEXITCODE_FAILURE;
4614 }
4615 }
4616
4617 return RTEXITCODE_SUCCESS;
4618}
4619#endif
4620
4621static void SseBinaryR32Test(void)
4622{
4623 X86FXSTATE State;
4624 RT_ZERO(State);
4625 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4626 {
4627 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4628 continue;
4629
4630 uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4631 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4632 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4633 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4634 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4635 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4636 {
4637 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4638 {
4639 IEMSSERESULT Res; RT_ZERO(Res);
4640
4641 State.MXCSR = paTests[iTest].fMxcsrIn;
4642 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4643 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4644 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4645 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4646 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4647 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4648 || !fValsIdentical)
4649 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4650 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4651 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4652 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4653 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4654 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4655 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4656 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4657 iVar ? " " : "", Res.MXCSR,
4658 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4659 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4660 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4661 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4662 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4663 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4664 !fValsIdentical ? " - val" : "",
4665 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4666 }
4667 pfn = g_aSseBinaryR32[iFn].pfnNative;
4668 }
4669 }
4670}
4671
4672
4673/*
4674 * Binary SSE operations on packed single precision floating point values.
4675 */
4676TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4677
4678static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4679{
4680 ENTRY_BIN(addpd_u128),
4681 ENTRY_BIN(mulpd_u128),
4682 ENTRY_BIN(subpd_u128),
4683 ENTRY_BIN(minpd_u128),
4684 ENTRY_BIN(divpd_u128),
4685 ENTRY_BIN(maxpd_u128),
4686 ENTRY_BIN(haddpd_u128),
4687 ENTRY_BIN(hsubpd_u128),
4688 ENTRY_BIN(sqrtpd_u128),
4689 ENTRY_BIN(addsubpd_u128),
4690 ENTRY_BIN(cvtpd2ps_u128),
4691};
4692
4693#ifdef TSTIEMAIMPL_WITH_GENERATOR
4694static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4695{
4696 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4697
4698 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4699 {
4700 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4701 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4702 /** @todo More specials. */
4703 };
4704
4705 X86FXSTATE State;
4706 RT_ZERO(State);
4707 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4708 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4709 {
4710 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4711
4712 PRTSTREAM pStrmOut = NULL;
4713 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4714 if (RT_FAILURE(rc))
4715 {
4716 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4717 return RTEXITCODE_FAILURE;
4718 }
4719
4720 uint32_t cNormalInputPairs = 0;
4721 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4722 {
4723 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4724
4725 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4726 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4727 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4728 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4729
4730 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4731 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4732 cNormalInputPairs++;
4733 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4734 {
4735 iTest -= 1;
4736 continue;
4737 }
4738
4739 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4740 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4741 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4742 for (uint8_t iFz = 0; iFz < 2; iFz++)
4743 {
4744 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4745 | (iRounding << X86_MXCSR_RC_SHIFT)
4746 | (iDaz ? X86_MXCSR_DAZ : 0)
4747 | (iFz ? X86_MXCSR_FZ : 0)
4748 | X86_MXCSR_XCPT_MASK;
4749 IEMSSERESULT ResM; RT_ZERO(ResM);
4750 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4751 TestData.fMxcsrIn = State.MXCSR;
4752 TestData.fMxcsrOut = ResM.MXCSR;
4753 TestData.OutVal = ResM.uResult;
4754 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4755
4756 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4757 IEMSSERESULT ResU; RT_ZERO(ResU);
4758 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4759 TestData.fMxcsrIn = State.MXCSR;
4760 TestData.fMxcsrOut = ResU.MXCSR;
4761 TestData.OutVal = ResU.uResult;
4762 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4763
4764 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4765 if (fXcpt)
4766 {
4767 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4768 IEMSSERESULT Res1; RT_ZERO(Res1);
4769 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4770 TestData.fMxcsrIn = State.MXCSR;
4771 TestData.fMxcsrOut = Res1.MXCSR;
4772 TestData.OutVal = Res1.uResult;
4773 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4774
4775 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4776 {
4777 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4778 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4779 IEMSSERESULT Res2; RT_ZERO(Res2);
4780 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4781 TestData.fMxcsrIn = State.MXCSR;
4782 TestData.fMxcsrOut = Res2.MXCSR;
4783 TestData.OutVal = Res2.uResult;
4784 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4785 }
4786 if (!RT_IS_POWER_OF_TWO(fXcpt))
4787 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4788 if (fUnmasked & fXcpt)
4789 {
4790 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4791 IEMSSERESULT Res3; RT_ZERO(Res3);
4792 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4793 TestData.fMxcsrIn = State.MXCSR;
4794 TestData.fMxcsrOut = Res3.MXCSR;
4795 TestData.OutVal = Res3.uResult;
4796 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4797 }
4798 }
4799 }
4800 }
4801 rc = RTStrmClose(pStrmOut);
4802 if (RT_FAILURE(rc))
4803 {
4804 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4805 return RTEXITCODE_FAILURE;
4806 }
4807 }
4808
4809 return RTEXITCODE_SUCCESS;
4810}
4811#endif
4812
4813
4814static void SseBinaryR64Test(void)
4815{
4816 X86FXSTATE State;
4817 RT_ZERO(State);
4818 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4819 {
4820 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4821 continue;
4822
4823 uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4824 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4825 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4826 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4827 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4828 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4829 {
4830 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4831 {
4832 IEMSSERESULT Res; RT_ZERO(Res);
4833
4834 State.MXCSR = paTests[iTest].fMxcsrIn;
4835 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4836 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4837 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4838 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4839 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
4840 "%s -> mxcsr=%#08x %s'%s\n"
4841 "%s expected %#08x %s'%s%s%s (%s)\n",
4842 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4843 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
4844 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
4845 iVar ? " " : "", Res.MXCSR,
4846 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
4847 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4848 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
4849 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4850 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4851 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4852 ? " - val" : "",
4853 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4854 }
4855 pfn = g_aSseBinaryR64[iFn].pfnNative;
4856 }
4857 }
4858}
4859
4860
4861/*
4862 * Binary SSE operations on packed single precision floating point values.
4863 */
4864TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
4865
4866static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
4867{
4868 ENTRY_BIN(addss_u128_r32),
4869 ENTRY_BIN(mulss_u128_r32),
4870 ENTRY_BIN(subss_u128_r32),
4871 ENTRY_BIN(minss_u128_r32),
4872 ENTRY_BIN(divss_u128_r32),
4873 ENTRY_BIN(maxss_u128_r32),
4874 ENTRY_BIN(cvtss2sd_u128_r32),
4875 ENTRY_BIN(sqrtss_u128_r32),
4876};
4877
4878#ifdef TSTIEMAIMPL_WITH_GENERATOR
4879static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
4880{
4881 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4882
4883 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
4884 {
4885 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4886 /** @todo More specials. */
4887 };
4888
4889 X86FXSTATE State;
4890 RT_ZERO(State);
4891 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4892 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4893 {
4894 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
4895
4896 PRTSTREAM pStrmOut = NULL;
4897 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
4898 if (RT_FAILURE(rc))
4899 {
4900 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4901 return RTEXITCODE_FAILURE;
4902 }
4903
4904 uint32_t cNormalInputPairs = 0;
4905 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4906 {
4907 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
4908
4909 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4910 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4911 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4912 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4913
4914 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4915
4916 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
4917 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
4918 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
4919 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
4920 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
4921 cNormalInputPairs++;
4922 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4923 {
4924 iTest -= 1;
4925 continue;
4926 }
4927
4928 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4929 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4930 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4931 for (uint8_t iFz = 0; iFz < 2; iFz++)
4932 {
4933 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4934 | (iRounding << X86_MXCSR_RC_SHIFT)
4935 | (iDaz ? X86_MXCSR_DAZ : 0)
4936 | (iFz ? X86_MXCSR_FZ : 0)
4937 | X86_MXCSR_XCPT_MASK;
4938 IEMSSERESULT ResM; RT_ZERO(ResM);
4939 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
4940 TestData.fMxcsrIn = State.MXCSR;
4941 TestData.fMxcsrOut = ResM.MXCSR;
4942 TestData.OutVal = ResM.uResult;
4943 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4944
4945 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4946 IEMSSERESULT ResU; RT_ZERO(ResU);
4947 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
4948 TestData.fMxcsrIn = State.MXCSR;
4949 TestData.fMxcsrOut = ResU.MXCSR;
4950 TestData.OutVal = ResU.uResult;
4951 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4952
4953 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4954 if (fXcpt)
4955 {
4956 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4957 IEMSSERESULT Res1; RT_ZERO(Res1);
4958 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
4959 TestData.fMxcsrIn = State.MXCSR;
4960 TestData.fMxcsrOut = Res1.MXCSR;
4961 TestData.OutVal = Res1.uResult;
4962 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4963
4964 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4965 {
4966 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4967 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4968 IEMSSERESULT Res2; RT_ZERO(Res2);
4969 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
4970 TestData.fMxcsrIn = State.MXCSR;
4971 TestData.fMxcsrOut = Res2.MXCSR;
4972 TestData.OutVal = Res2.uResult;
4973 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4974 }
4975 if (!RT_IS_POWER_OF_TWO(fXcpt))
4976 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4977 if (fUnmasked & fXcpt)
4978 {
4979 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4980 IEMSSERESULT Res3; RT_ZERO(Res3);
4981 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
4982 TestData.fMxcsrIn = State.MXCSR;
4983 TestData.fMxcsrOut = Res3.MXCSR;
4984 TestData.OutVal = Res3.uResult;
4985 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4986 }
4987 }
4988 }
4989 }
4990 rc = RTStrmClose(pStrmOut);
4991 if (RT_FAILURE(rc))
4992 {
4993 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4994 return RTEXITCODE_FAILURE;
4995 }
4996 }
4997
4998 return RTEXITCODE_SUCCESS;
4999}
5000#endif
5001
5002static void SseBinaryU128R32Test(void)
5003{
5004 X86FXSTATE State;
5005 RT_ZERO(State);
5006 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5007 {
5008 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
5009 continue;
5010
5011 uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
5012 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5013 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5014 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5015 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5016 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5017 {
5018 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5019 {
5020 IEMSSERESULT Res; RT_ZERO(Res);
5021
5022 State.MXCSR = paTests[iTest].fMxcsrIn;
5023 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5024 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5025 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5026 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5027 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5028 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5029 || !fValsIdentical)
5030 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5031 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5032 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5033 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5034 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5035 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5036 FormatR32(&paTests[iTest].r32Val2),
5037 iVar ? " " : "", Res.MXCSR,
5038 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5039 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5040 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5041 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5042 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5043 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5044 !fValsIdentical ? " - val" : "",
5045 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5046 }
5047 }
5048 }
5049}
5050
5051
5052/*
5053 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5054 */
5055TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5056
5057static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5058{
5059 ENTRY_BIN(addsd_u128_r64),
5060 ENTRY_BIN(mulsd_u128_r64),
5061 ENTRY_BIN(subsd_u128_r64),
5062 ENTRY_BIN(minsd_u128_r64),
5063 ENTRY_BIN(divsd_u128_r64),
5064 ENTRY_BIN(maxsd_u128_r64),
5065 ENTRY_BIN(cvtsd2ss_u128_r64),
5066 ENTRY_BIN(sqrtsd_u128_r64),
5067};
5068
5069#ifdef TSTIEMAIMPL_WITH_GENERATOR
5070static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5071{
5072 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5073
5074 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5075 {
5076 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5077 /** @todo More specials. */
5078 };
5079
5080 X86FXSTATE State;
5081 RT_ZERO(State);
5082 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5083 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5084 {
5085 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5086
5087 PRTSTREAM pStrmOut = NULL;
5088 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5089 if (RT_FAILURE(rc))
5090 {
5091 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5092 return RTEXITCODE_FAILURE;
5093 }
5094
5095 uint32_t cNormalInputPairs = 0;
5096 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5097 {
5098 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5099
5100 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5101 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5102 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5103
5104 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5105 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5106 cNormalInputPairs++;
5107 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5108 {
5109 iTest -= 1;
5110 continue;
5111 }
5112
5113 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5114 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5115 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5116 for (uint8_t iFz = 0; iFz < 2; iFz++)
5117 {
5118 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5119 | (iRounding << X86_MXCSR_RC_SHIFT)
5120 | (iDaz ? X86_MXCSR_DAZ : 0)
5121 | (iFz ? X86_MXCSR_FZ : 0)
5122 | X86_MXCSR_XCPT_MASK;
5123 IEMSSERESULT ResM; RT_ZERO(ResM);
5124 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5125 TestData.fMxcsrIn = State.MXCSR;
5126 TestData.fMxcsrOut = ResM.MXCSR;
5127 TestData.OutVal = ResM.uResult;
5128 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5129
5130 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5131 IEMSSERESULT ResU; RT_ZERO(ResU);
5132 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5133 TestData.fMxcsrIn = State.MXCSR;
5134 TestData.fMxcsrOut = ResU.MXCSR;
5135 TestData.OutVal = ResU.uResult;
5136 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5137
5138 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5139 if (fXcpt)
5140 {
5141 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5142 IEMSSERESULT Res1; RT_ZERO(Res1);
5143 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5144 TestData.fMxcsrIn = State.MXCSR;
5145 TestData.fMxcsrOut = Res1.MXCSR;
5146 TestData.OutVal = Res1.uResult;
5147 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5148
5149 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5150 {
5151 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5152 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5153 IEMSSERESULT Res2; RT_ZERO(Res2);
5154 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5155 TestData.fMxcsrIn = State.MXCSR;
5156 TestData.fMxcsrOut = Res2.MXCSR;
5157 TestData.OutVal = Res2.uResult;
5158 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5159 }
5160 if (!RT_IS_POWER_OF_TWO(fXcpt))
5161 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5162 if (fUnmasked & fXcpt)
5163 {
5164 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5165 IEMSSERESULT Res3; RT_ZERO(Res3);
5166 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5167 TestData.fMxcsrIn = State.MXCSR;
5168 TestData.fMxcsrOut = Res3.MXCSR;
5169 TestData.OutVal = Res3.uResult;
5170 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5171 }
5172 }
5173 }
5174 }
5175 rc = RTStrmClose(pStrmOut);
5176 if (RT_FAILURE(rc))
5177 {
5178 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5179 return RTEXITCODE_FAILURE;
5180 }
5181 }
5182
5183 return RTEXITCODE_SUCCESS;
5184}
5185#endif
5186
5187
5188static void SseBinaryU128R64Test(void)
5189{
5190 X86FXSTATE State;
5191 RT_ZERO(State);
5192 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5193 {
5194 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5195 continue;
5196
5197 uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5198 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5199 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5200 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5201 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5202 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5203 {
5204 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5205 {
5206 IEMSSERESULT Res; RT_ZERO(Res);
5207
5208 State.MXCSR = paTests[iTest].fMxcsrIn;
5209 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5210 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5211 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5212 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5213 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5214 "%s -> mxcsr=%#08x %s'%s\n"
5215 "%s expected %#08x %s'%s%s%s (%s)\n",
5216 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5217 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5218 FormatR64(&paTests[iTest].r64Val2),
5219 iVar ? " " : "", Res.MXCSR,
5220 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5221 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5222 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5223 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5224 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5225 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5226 ? " - val" : "",
5227 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5228 }
5229 }
5230 }
5231}
5232
5233
5234
5235int main(int argc, char **argv)
5236{
5237 int rc = RTR3InitExe(argc, &argv, 0);
5238 if (RT_FAILURE(rc))
5239 return RTMsgInitFailure(rc);
5240
5241 /*
5242 * Determin the host CPU.
5243 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
5244 */
5245#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
5246 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
5247 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
5248 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5249#else
5250 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5251#endif
5252
5253 /*
5254 * Parse arguments.
5255 */
5256 enum { kModeNotSet, kModeTest, kModeGenerate }
5257 enmMode = kModeNotSet;
5258 bool fInt = true;
5259 bool fFpuLdSt = true;
5260 bool fFpuBinary1 = true;
5261 bool fFpuBinary2 = true;
5262 bool fFpuOther = true;
5263 bool fCpuData = true;
5264 bool fCommonData = true;
5265 bool fSseFpBinary = true;
5266 uint32_t const cDefaultTests = 96;
5267 uint32_t cTests = cDefaultTests;
5268 RTGETOPTDEF const s_aOptions[] =
5269 {
5270 // mode:
5271 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
5272 { "--test", 't', RTGETOPT_REQ_NOTHING },
5273 // test selection (both)
5274 { "--all", 'a', RTGETOPT_REQ_NOTHING },
5275 { "--none", 'z', RTGETOPT_REQ_NOTHING },
5276 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
5277 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
5278 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
5279 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
5280 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
5281 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
5282 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
5283 { "--int", 'i', RTGETOPT_REQ_NOTHING },
5284 { "--include", 'I', RTGETOPT_REQ_STRING },
5285 { "--exclude", 'X', RTGETOPT_REQ_STRING },
5286 // generation parameters
5287 { "--common", 'm', RTGETOPT_REQ_NOTHING },
5288 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
5289 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
5290 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
5291 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
5292 };
5293
5294 RTGETOPTSTATE State;
5295 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
5296 AssertRCReturn(rc, RTEXITCODE_FAILURE);
5297
5298 RTGETOPTUNION ValueUnion;
5299 while ((rc = RTGetOpt(&State, &ValueUnion)))
5300 {
5301 switch (rc)
5302 {
5303 case 'g':
5304 enmMode = kModeGenerate;
5305 break;
5306 case 't':
5307 enmMode = kModeTest;
5308 break;
5309
5310 case 'a':
5311 fCpuData = true;
5312 fCommonData = true;
5313 fInt = true;
5314 fFpuLdSt = true;
5315 fFpuBinary1 = true;
5316 fFpuBinary2 = true;
5317 fFpuOther = true;
5318 fSseFpBinary = true;
5319 break;
5320 case 'z':
5321 fCpuData = false;
5322 fCommonData = false;
5323 fInt = false;
5324 fFpuLdSt = false;
5325 fFpuBinary1 = false;
5326 fFpuBinary2 = false;
5327 fFpuOther = false;
5328 fSseFpBinary = false;
5329 break;
5330
5331 case 'F':
5332 fFpuLdSt = true;
5333 break;
5334 case 'O':
5335 fFpuOther = true;
5336 break;
5337 case 'B':
5338 fFpuBinary1 = true;
5339 break;
5340 case 'P':
5341 fFpuBinary2 = true;
5342 break;
5343 case 'S':
5344 fSseFpBinary = true;
5345 break;
5346 case 'i':
5347 fInt = true;
5348 break;
5349
5350 case 'I':
5351 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
5352 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
5353 RT_ELEMENTS(g_apszIncludeTestPatterns));
5354 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
5355 break;
5356 case 'X':
5357 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
5358 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
5359 RT_ELEMENTS(g_apszExcludeTestPatterns));
5360 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
5361 break;
5362
5363 case 'm':
5364 fCommonData = true;
5365 break;
5366 case 'c':
5367 fCpuData = true;
5368 break;
5369 case 'n':
5370 cTests = ValueUnion.u32;
5371 break;
5372
5373 case 'q':
5374 g_cVerbosity = 0;
5375 break;
5376 case 'v':
5377 g_cVerbosity++;
5378 break;
5379
5380 case 'h':
5381 RTPrintf("usage: %s <-g|-t> [options]\n"
5382 "\n"
5383 "Mode:\n"
5384 " -g, --generate\n"
5385 " Generate test data.\n"
5386 " -t, --test\n"
5387 " Execute tests.\n"
5388 "\n"
5389 "Test selection (both modes):\n"
5390 " -a, --all\n"
5391 " Enable all tests and generated test data. (default)\n"
5392 " -z, --zap, --none\n"
5393 " Disable all tests and test data types.\n"
5394 " -i, --int\n"
5395 " Enable non-FPU tests.\n"
5396 " -F, --fpu-ld-st\n"
5397 " Enable FPU load and store tests.\n"
5398 " -B, --fpu-binary-1\n"
5399 " Enable FPU binary 80-bit FP tests.\n"
5400 " -P, --fpu-binary-2\n"
5401 " Enable FPU binary 64- and 32-bit FP tests.\n"
5402 " -O, --fpu-other\n"
5403 " Enable FPU binary 64- and 32-bit FP tests.\n"
5404 " -S, --sse-fp-binary\n"
5405 " Enable SSE binary 64- and 32-bit FP tests.\n"
5406 " -I,--include=<test-patter>\n"
5407 " Enable tests matching the given pattern.\n"
5408 " -X,--exclude=<test-patter>\n"
5409 " Skip tests matching the given pattern (overrides --include).\n"
5410 "\n"
5411 "Generation:\n"
5412 " -m, --common\n"
5413 " Enable generating common test data.\n"
5414 " -c, --only-cpu\n"
5415 " Enable generating CPU specific test data.\n"
5416 " -n, --number-of-test <count>\n"
5417 " Number of tests to generate. Default: %u\n"
5418 "\n"
5419 "Other:\n"
5420 " -v, --verbose\n"
5421 " -q, --quiet\n"
5422 " Noise level. Default: --quiet\n"
5423 , argv[0], cDefaultTests);
5424 return RTEXITCODE_SUCCESS;
5425 default:
5426 return RTGetOptPrintError(rc, &ValueUnion);
5427 }
5428 }
5429
5430 /*
5431 * Generate data?
5432 */
5433 if (enmMode == kModeGenerate)
5434 {
5435#ifdef TSTIEMAIMPL_WITH_GENERATOR
5436 char szCpuDesc[256] = {0};
5437 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
5438 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
5439# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
5440 const char * const pszBitBucket = "NUL";
5441# else
5442 const char * const pszBitBucket = "/dev/null";
5443# endif
5444
5445 if (cTests == 0)
5446 cTests = cDefaultTests;
5447 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
5448 g_cZeroSrcTests = g_cZeroDstTests * 2;
5449
5450 if (fInt)
5451 {
5452 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
5453 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5454 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5455 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
5456 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5457 if (!pStrmData || !pStrmDataCpu)
5458 return RTEXITCODE_FAILURE;
5459
5460 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
5461 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
5462 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
5463 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
5464 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
5465 UnaryGenerate(pStrmData, cTests);
5466 ShiftGenerate(pStrmDataCpu, cTests);
5467 MulDivGenerate(pStrmDataCpu, cTests);
5468
5469 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5470 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5471 if (rcExit != RTEXITCODE_SUCCESS)
5472 return rcExit;
5473 }
5474
5475 if (fFpuLdSt)
5476 {
5477 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
5478 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5479 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5480 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
5481 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5482 if (!pStrmData || !pStrmDataCpu)
5483 return RTEXITCODE_FAILURE;
5484
5485 FpuLdConstGenerate(pStrmData, cTests);
5486 FpuLdIntGenerate(pStrmData, cTests);
5487 FpuLdD80Generate(pStrmData, cTests);
5488 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
5489 FpuStD80Generate(pStrmData, cTests);
5490 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
5491 FpuLdMemGenerate(pStrmData, cTests2);
5492 FpuStMemGenerate(pStrmData, cTests2);
5493
5494 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5495 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5496 if (rcExit != RTEXITCODE_SUCCESS)
5497 return rcExit;
5498 }
5499
5500 if (fFpuBinary1)
5501 {
5502 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
5503 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5504 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5505 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
5506 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5507 if (!pStrmData || !pStrmDataCpu)
5508 return RTEXITCODE_FAILURE;
5509
5510 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5511 FpuBinaryFswR80Generate(pStrmData, cTests);
5512 FpuBinaryEflR80Generate(pStrmData, cTests);
5513
5514 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5515 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5516 if (rcExit != RTEXITCODE_SUCCESS)
5517 return rcExit;
5518 }
5519
5520 if (fFpuBinary2)
5521 {
5522 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
5523 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5524 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5525 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
5526 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5527 if (!pStrmData || !pStrmDataCpu)
5528 return RTEXITCODE_FAILURE;
5529
5530 FpuBinaryR64Generate(pStrmData, cTests);
5531 FpuBinaryR32Generate(pStrmData, cTests);
5532 FpuBinaryI32Generate(pStrmData, cTests);
5533 FpuBinaryI16Generate(pStrmData, cTests);
5534 FpuBinaryFswR64Generate(pStrmData, cTests);
5535 FpuBinaryFswR32Generate(pStrmData, cTests);
5536 FpuBinaryFswI32Generate(pStrmData, cTests);
5537 FpuBinaryFswI16Generate(pStrmData, cTests);
5538
5539 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5540 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5541 if (rcExit != RTEXITCODE_SUCCESS)
5542 return rcExit;
5543 }
5544
5545 if (fFpuOther)
5546 {
5547 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
5548 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5549 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5550 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
5551 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5552 if (!pStrmData || !pStrmDataCpu)
5553 return RTEXITCODE_FAILURE;
5554
5555 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5556 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
5557 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
5558
5559 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5560 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5561 if (rcExit != RTEXITCODE_SUCCESS)
5562 return rcExit;
5563 }
5564
5565 if (fSseFpBinary)
5566 {
5567 const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
5568
5569 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
5570 if (rcExit == RTEXITCODE_SUCCESS)
5571 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
5572 if (rcExit == RTEXITCODE_SUCCESS)
5573 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
5574 if (rcExit == RTEXITCODE_SUCCESS)
5575 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
5576 if (rcExit != RTEXITCODE_SUCCESS)
5577 return rcExit;
5578 }
5579
5580 return RTEXITCODE_SUCCESS;
5581#else
5582 return RTMsgErrorExitFailure("Test data generator not compiled in!");
5583#endif
5584 }
5585
5586 /*
5587 * Do testing. Currrently disabled by default as data needs to be checked
5588 * on both intel and AMD systems first.
5589 */
5590 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
5591 AssertRCReturn(rc, RTEXITCODE_FAILURE);
5592 if (enmMode == kModeTest)
5593 {
5594 RTTestBanner(g_hTest);
5595
5596 /* Allocate guarded memory for use in the tests. */
5597#define ALLOC_GUARDED_VAR(a_puVar) do { \
5598 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
5599 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
5600 } while (0)
5601 ALLOC_GUARDED_VAR(g_pu8);
5602 ALLOC_GUARDED_VAR(g_pu16);
5603 ALLOC_GUARDED_VAR(g_pu32);
5604 ALLOC_GUARDED_VAR(g_pu64);
5605 ALLOC_GUARDED_VAR(g_pu128);
5606 ALLOC_GUARDED_VAR(g_pu8Two);
5607 ALLOC_GUARDED_VAR(g_pu16Two);
5608 ALLOC_GUARDED_VAR(g_pu32Two);
5609 ALLOC_GUARDED_VAR(g_pu64Two);
5610 ALLOC_GUARDED_VAR(g_pu128Two);
5611 ALLOC_GUARDED_VAR(g_pfEfl);
5612 if (RTTestErrorCount(g_hTest) == 0)
5613 {
5614 if (fInt)
5615 {
5616 BinU8Test();
5617 BinU16Test();
5618 BinU32Test();
5619 BinU64Test();
5620 XchgTest();
5621 XaddTest();
5622 CmpXchgTest();
5623 CmpXchg8bTest();
5624 CmpXchg16bTest();
5625 ShiftDblTest();
5626 UnaryTest();
5627 ShiftTest();
5628 MulDivTest();
5629 BswapTest();
5630 }
5631
5632 if (fFpuLdSt)
5633 {
5634 FpuLoadConstTest();
5635 FpuLdMemTest();
5636 FpuLdIntTest();
5637 FpuLdD80Test();
5638 FpuStMemTest();
5639 FpuStIntTest();
5640 FpuStD80Test();
5641 }
5642
5643 if (fFpuBinary1)
5644 {
5645 FpuBinaryR80Test();
5646 FpuBinaryFswR80Test();
5647 FpuBinaryEflR80Test();
5648 }
5649
5650 if (fFpuBinary2)
5651 {
5652 FpuBinaryR64Test();
5653 FpuBinaryR32Test();
5654 FpuBinaryI32Test();
5655 FpuBinaryI16Test();
5656 FpuBinaryFswR64Test();
5657 FpuBinaryFswR32Test();
5658 FpuBinaryFswI32Test();
5659 FpuBinaryFswI16Test();
5660 }
5661
5662 if (fFpuOther)
5663 {
5664 FpuUnaryR80Test();
5665 FpuUnaryFswR80Test();
5666 FpuUnaryTwoR80Test();
5667 }
5668
5669 if (fSseFpBinary)
5670 {
5671 SseBinaryR32Test();
5672 SseBinaryR64Test();
5673 SseBinaryU128R32Test();
5674 SseBinaryU128R64Test();
5675 }
5676 }
5677 return RTTestSummaryAndDestroy(g_hTest);
5678 }
5679 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
5680}
5681
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette