VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp

Last change on this file was 104269, checked in by vboxsync, 2 months ago

VMM/IEM: Rework pcmpistri emulation to pass the new ECX value as return argument freeing up one argument which can be used to pass both source operands by reference getting rid of IEMPCMPISTRXSRC for this. This enables recompilation of pcmpistri which is used by Linux a fair bit, bugref:10641

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 488.6 KB
Line 
1/* $Id: tstIEMAImpl.cpp 104269 2024-04-10 09:42:20Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/buildconfig.h>
38#include <iprt/ctype.h>
39#include <iprt/err.h>
40#include <iprt/getopt.h>
41#include <iprt/initterm.h>
42#include <iprt/file.h>
43#include <iprt/mem.h>
44#include <iprt/message.h>
45#include <iprt/mp.h>
46#include <iprt/rand.h>
47#include <iprt/stream.h>
48#include <iprt/string.h>
49#include <iprt/test.h>
50#include <iprt/time.h>
51#include <iprt/thread.h>
52#include <iprt/vfs.h>
53#include <iprt/zip.h>
54#include <VBox/version.h>
55
56#include "tstIEMAImpl.h"
57
58
59/*********************************************************************************************************************************
60* Defined Constants And Macros *
61*********************************************************************************************************************************/
62#define ENTRY_BIN_FIX(a_Name) ENTRY_BIN_FIX_EX(a_Name, 0)
63#ifdef TSTIEMAIMPL_WITH_GENERATOR
64# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) \
65 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
66 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
67 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
68 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
69#else
70# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) ENTRY_BIN_EX(a_Name, a_uExtra)
71#endif
72
73#define ENTRY_BIN_PFN_CAST(a_Name, a_pfnType) ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, 0)
74#define ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
75 { RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
76 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
77 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
78
79#define ENTRY_BIN(a_Name) ENTRY_BIN_EX(a_Name, 0)
80#define ENTRY_BIN_EX(a_Name, a_uExtra) \
81 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
82 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
83 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
84
85#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
86#ifndef IEM_WITHOUT_ASSEMBLY
87# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
88 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
89 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
90 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
91#else
92# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
93 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
94 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
95 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
96#endif
97
98#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
99#ifndef IEM_WITHOUT_ASSEMBLY
100# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
101 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
102 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
103 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
104#else
105# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
106 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
107 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
108 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
109#endif
110
111#define ENTRY_BIN_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, 0)
112#define ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
113 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
114 g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
115 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
116
117#define ENTRY_BIN_AMD(a_Name, a_fEflUndef) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, 0)
118#define ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
119 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
120 g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
121 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
122
123#define ENTRY_BIN_FIX_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, 0)
124#ifdef TSTIEMAIMPL_WITH_GENERATOR
125# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
126 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
127 g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
128 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL, \
129 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
130#else
131# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra)
132#endif
133
134#define ENTRY_BIN_FIX_AMD(a_Name, a_fEflUndef) ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, 0)
135#ifdef TSTIEMAIMPL_WITH_GENERATOR
136# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
137 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
138 g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
139 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD, \
140 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
141#else
142# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra)
143#endif
144
145
146#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
147 typedef struct a_TypeName \
148 { \
149 const char *pszName; \
150 const a_FunctionPtrType pfn; \
151 const a_FunctionPtrType pfnNative; \
152 void const * const pvCompressedTests; \
153 uint32_t const *pcbCompressedTests; \
154 uint32_t const uExtra; \
155 uint8_t const idxCpuEflFlavour; \
156 uint16_t const cFixedTests; \
157 a_TestType const * const paFixedTests; \
158 a_TestType const *paTests; /**< The decompressed info. */ \
159 uint32_t cTests; /**< The decompressed info. */ \
160 IEMTESTENTRYINFO Info; \
161 } a_TypeName
162
163#define COUNT_VARIATIONS(a_SubTest) \
164 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
165
166
167/*********************************************************************************************************************************
168* Structures and Typedefs *
169*********************************************************************************************************************************/
170typedef struct IEMBINARYHEADER
171{
172 char szMagic[16];
173 uint32_t cbEntry;
174 uint32_t uSvnRev;
175 uint32_t auUnused[6];
176 char szCpuDesc[80];
177} IEMBINARYHEADER;
178AssertCompileSize(IEMBINARYHEADER, 128);
179
180 // 01234567890123456
181#define IEMBINARYHEADER_MAGIC "IEMAImpl Bin v1"
182AssertCompile(sizeof(IEMBINARYHEADER_MAGIC) == 16);
183
184
185typedef struct IEMBINARYFOOTER
186{
187 char szMagic[24];
188 uint32_t cbEntry;
189 uint32_t cEntries;
190} IEMBINARYFOOTER;
191AssertCompileSize(IEMBINARYFOOTER, 32);
192 // 012345678901234567890123
193#define IEMBINARYFOOTER_MAGIC "\nIEMAImpl Bin Footer v1"
194AssertCompile(sizeof(IEMBINARYFOOTER_MAGIC) == 24);
195
196
197/** Fixed part of TYPEDEF_SUBTEST_TYPE and friends. */
198typedef struct IEMTESTENTRYINFO
199{
200 void *pvUncompressed;
201 uint32_t cbUncompressed;
202 const char *pszCpuDesc;
203 uint32_t uSvnRev;
204} IEMTESTENTRYINFO;
205
206
207#ifdef TSTIEMAIMPL_WITH_GENERATOR
208typedef struct IEMBINARYOUTPUT
209{
210 /** The output file. */
211 RTVFSFILE hVfsFile;
212 /** The stream we write uncompressed binary test data to. */
213 RTVFSIOSTREAM hVfsUncompressed;
214 /** The number of bytes written (ignoring write failures). */
215 size_t cbWritten;
216 /** The entry size. */
217 uint32_t cbEntry;
218 /** Write status. */
219 int rcWrite;
220 /** Set if NULL. */
221 bool fNull;
222 /** Set if we wrote a header and should write a footer as well. */
223 bool fWroteHeader;
224 /** Filename. */
225 char szFilename[94];
226} IEMBINARYOUTPUT;
227typedef IEMBINARYOUTPUT *PIEMBINARYOUTPUT;
228#endif /* TSTIEMAIMPL_WITH_GENERATOR */
229
230
231/*********************************************************************************************************************************
232* Global Variables *
233*********************************************************************************************************************************/
234static RTTEST g_hTest;
235static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
236#ifdef TSTIEMAIMPL_WITH_GENERATOR
237static uint32_t g_cZeroDstTests = 2;
238static uint32_t g_cZeroSrcTests = 4;
239#endif
240static uint8_t *g_pu8, *g_pu8Two;
241static uint16_t *g_pu16, *g_pu16Two;
242static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
243static uint64_t *g_pu64, *g_pu64Two;
244static RTUINT128U *g_pu128, *g_pu128Two;
245
246static char g_aszBuf[32][256];
247static unsigned g_idxBuf = 0;
248
249static uint32_t g_cIncludeTestPatterns;
250static uint32_t g_cExcludeTestPatterns;
251static const char *g_apszIncludeTestPatterns[64];
252static const char *g_apszExcludeTestPatterns[64];
253
254/** Higher value, means longer benchmarking. */
255static uint64_t g_cPicoSecBenchmark = 0;
256
257static unsigned g_cVerbosity = 0;
258static bool g_fVerboseSkipping = true;
259
260
261#ifdef TSTIEMAIMPL_WITH_GENERATOR
262/** The SVN revision (for use in the binary headers). */
263static uint32_t g_uSvnRev = 0;
264/** The CPU description (for use in the binary headers). */
265static char g_szCpuDesc[80] = "";
266#endif
267
268
269/*********************************************************************************************************************************
270* Internal Functions *
271*********************************************************************************************************************************/
272static const char *FormatR80(PCRTFLOAT80U pr80);
273static const char *FormatR64(PCRTFLOAT64U pr64);
274static const char *FormatR32(PCRTFLOAT32U pr32);
275
276
277/*
278 * Random helpers.
279 */
280
281static uint32_t RandEFlags(void)
282{
283 uint32_t fEfl = RTRandU32();
284 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
285}
286
287#ifdef TSTIEMAIMPL_WITH_GENERATOR
288
289static uint8_t RandU8(void)
290{
291 return RTRandU32Ex(0, 0xff);
292}
293
294
295static uint16_t RandU16(void)
296{
297 return RTRandU32Ex(0, 0xffff);
298}
299
300
301static uint32_t RandU32(void)
302{
303 return RTRandU32();
304}
305
306#endif
307
308static uint64_t RandU64(void)
309{
310 return RTRandU64();
311}
312
313
314static RTUINT128U RandU128(void)
315{
316 RTUINT128U Ret;
317 Ret.s.Hi = RTRandU64();
318 Ret.s.Lo = RTRandU64();
319 return Ret;
320}
321
322#ifdef TSTIEMAIMPL_WITH_GENERATOR
323
324static uint8_t RandU8Dst(uint32_t iTest)
325{
326 if (iTest < g_cZeroDstTests)
327 return 0;
328 return RandU8();
329}
330
331
332static uint8_t RandU8Src(uint32_t iTest)
333{
334 if (iTest < g_cZeroSrcTests)
335 return 0;
336 return RandU8();
337}
338
339
340static uint16_t RandU16Dst(uint32_t iTest)
341{
342 if (iTest < g_cZeroDstTests)
343 return 0;
344 return RandU16();
345}
346
347
348static uint16_t RandU16Src(uint32_t iTest)
349{
350 if (iTest < g_cZeroSrcTests)
351 return 0;
352 return RandU16();
353}
354
355
356static uint32_t RandU32Dst(uint32_t iTest)
357{
358 if (iTest < g_cZeroDstTests)
359 return 0;
360 return RandU32();
361}
362
363
364static uint32_t RandU32Src(uint32_t iTest)
365{
366 if (iTest < g_cZeroSrcTests)
367 return 0;
368 return RandU32();
369}
370
371
372static uint64_t RandU64Dst(uint32_t iTest)
373{
374 if (iTest < g_cZeroDstTests)
375 return 0;
376 return RandU64();
377}
378
379
380static uint64_t RandU64Src(uint32_t iTest)
381{
382 if (iTest < g_cZeroSrcTests)
383 return 0;
384 return RandU64();
385}
386
387
388/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
389static int16_t RandI16Src2(uint32_t iTest)
390{
391 if (iTest < 18 * 4)
392 switch (iTest % 4)
393 {
394 case 0: return 0;
395 case 1: return INT16_MAX;
396 case 2: return INT16_MIN;
397 case 3: break;
398 }
399 return (int16_t)RandU16();
400}
401
402
403/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
404static int32_t RandI32Src2(uint32_t iTest)
405{
406 if (iTest < 18 * 4)
407 switch (iTest % 4)
408 {
409 case 0: return 0;
410 case 1: return INT32_MAX;
411 case 2: return INT32_MIN;
412 case 3: break;
413 }
414 return (int32_t)RandU32();
415}
416
417
418static int64_t RandI64Src(uint32_t iTest)
419{
420 RT_NOREF(iTest);
421 return (int64_t)RandU64();
422}
423
424
425static uint16_t RandFcw(void)
426{
427 return RandU16() & ~X86_FCW_ZERO_MASK;
428}
429
430
431static uint16_t RandFsw(void)
432{
433 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
434 return RandU16();
435}
436
437
438static uint32_t RandMxcsr(void)
439{
440 return RandU32() & ~X86_MXCSR_ZERO_MASK;
441}
442
443
444static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
445{
446 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
447 pr80->sj64.uFraction >>= cShift;
448 else
449 pr80->sj64.uFraction = (cShift % 19) + 1;
450}
451
452
453
454static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
455{
456 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
457
458 RTFLOAT80U r80;
459 r80.au64[0] = RandU64();
460 r80.au16[4] = RandU16();
461
462 /*
463 * Adjust the random stuff according to bType.
464 */
465 bType &= 0x1f;
466 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
467 {
468 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
469 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
470 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
471 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
472 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
473 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
474 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
475 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
476 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
477 }
478 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
479 {
480 /* Denormals (4,5) and Pseudo denormals (6,7) */
481 if (bType & 1)
482 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
483 else if (r80.sj64.uFraction == 0 && bType < 6)
484 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
485 r80.sj64.uExponent = 0;
486 r80.sj64.fInteger = bType >= 6;
487 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
488 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
489 }
490 else if (bType == 8 || bType == 9)
491 {
492 /* Pseudo NaN. */
493 if (bType & 1)
494 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
495 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
496 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
497 r80.sj64.uExponent = 0x7fff;
498 if (r80.sj64.fInteger)
499 r80.sj64.uFraction |= RT_BIT_64(62);
500 else
501 r80.sj64.uFraction &= ~RT_BIT_64(62);
502 r80.sj64.fInteger = 0;
503 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
504 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
505 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
506 }
507 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
508 {
509 /* Quiet and signalling NaNs. */
510 if (bType & 1)
511 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
512 else if (r80.sj64.uFraction == 0)
513 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
514 r80.sj64.uExponent = 0x7fff;
515 if (bType < 12)
516 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
517 else
518 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
519 r80.sj64.fInteger = 1;
520 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
521 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
522 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
523 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
524 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
525 }
526 else if (bType == 14 || bType == 15)
527 {
528 /* Unnormals */
529 if (bType & 1)
530 SafeR80FractionShift(&r80, RandU8() % 62);
531 r80.sj64.fInteger = 0;
532 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
533 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
534 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
535 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
536 }
537 else if (bType < 26)
538 {
539 /* Make sure we have lots of normalized values. */
540 if (!fIntTarget)
541 {
542 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
543 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
544 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
545 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
546 r80.sj64.fInteger = 1;
547 if (r80.sj64.uExponent <= uMinExp)
548 r80.sj64.uExponent = uMinExp + 1;
549 else if (r80.sj64.uExponent >= uMaxExp)
550 r80.sj64.uExponent = uMaxExp - 1;
551
552 if (bType == 16)
553 { /* All 1s is useful to testing rounding. Also try trigger special
554 behaviour by sometimes rounding out of range, while we're at it. */
555 r80.sj64.uFraction = RT_BIT_64(63) - 1;
556 uint8_t bExp = RandU8();
557 if ((bExp & 3) == 0)
558 r80.sj64.uExponent = uMaxExp - 1;
559 else if ((bExp & 3) == 1)
560 r80.sj64.uExponent = uMinExp + 1;
561 else if ((bExp & 3) == 2)
562 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
563 }
564 }
565 else
566 {
567 /* integer target: */
568 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
569 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
570 r80.sj64.fInteger = 1;
571 if (r80.sj64.uExponent < uMinExp)
572 r80.sj64.uExponent = uMinExp;
573 else if (r80.sj64.uExponent > uMaxExp)
574 r80.sj64.uExponent = uMaxExp;
575
576 if (bType == 16)
577 { /* All 1s is useful to testing rounding. Also try trigger special
578 behaviour by sometimes rounding out of range, while we're at it. */
579 r80.sj64.uFraction = RT_BIT_64(63) - 1;
580 uint8_t bExp = RandU8();
581 if ((bExp & 3) == 0)
582 r80.sj64.uExponent = uMaxExp;
583 else if ((bExp & 3) == 1)
584 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
585 }
586 }
587
588 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
589 }
590 return r80;
591}
592
593
594static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
595{
596 /*
597 * Make it more likely that we get a good selection of special values.
598 */
599 return RandR80Ex(RandU8(), cTarget, fIntTarget);
600
601}
602
603
604static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
605{
606 /* Make sure we cover all the basic types first before going for random selection: */
607 if (iTest <= 18)
608 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
609 return RandR80(cTarget, fIntTarget);
610}
611
612
613/**
614 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
615 * to a 0..17, covering all basic value types.
616 */
617static uint8_t RandR80Src12RemapType(uint8_t bType)
618{
619 switch (bType)
620 {
621 case 0: return 18; /* normal */
622 case 1: return 16; /* normal extreme rounding */
623 case 2: return 14; /* unnormal */
624 case 3: return 12; /* Signalling NaN */
625 case 4: return 10; /* Quiet NaN */
626 case 5: return 8; /* PseudoNaN */
627 case 6: return 6; /* Pseudo Denormal */
628 case 7: return 4; /* Denormal */
629 case 8: return 3; /* Indefinite */
630 case 9: return 2; /* Infinity */
631 case 10: return 1; /* Pseudo-Infinity */
632 case 11: return 0; /* Zero */
633 default: AssertFailedReturn(18);
634 }
635}
636
637
638/**
639 * This works in tandem with RandR80Src2 to make sure we cover all operand
640 * type mixes first before we venture into regular random testing.
641 *
642 * There are 11 basic variations, when we leave out the five odd ones using
643 * SafeR80FractionShift. Because of the special normalized value targetting at
644 * rounding, we make it an even 12. So 144 combinations for two operands.
645 */
646static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
647{
648 if (cPartnerBits == 80)
649 {
650 Assert(!fPartnerInt);
651 if (iTest < 12 * 12)
652 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
653 }
654 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
655 {
656 if (iTest < 12 * 10)
657 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
658 }
659 else if (iTest < 18 * 4 && fPartnerInt)
660 return RandR80Ex(iTest / 4);
661 return RandR80();
662}
663
664
665/** Partner to RandR80Src1. */
666static RTFLOAT80U RandR80Src2(uint32_t iTest)
667{
668 if (iTest < 12 * 12)
669 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
670 return RandR80();
671}
672
673
674static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
675{
676 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
677 pr64->s64.uFraction >>= cShift;
678 else
679 pr64->s64.uFraction = (cShift % 19) + 1;
680}
681
682
683static RTFLOAT64U RandR64Ex(uint8_t bType)
684{
685 RTFLOAT64U r64;
686 r64.u = RandU64();
687
688 /*
689 * Make it more likely that we get a good selection of special values.
690 * On average 6 out of 16 calls should return a special value.
691 */
692 bType &= 0xf;
693 if (bType == 0 || bType == 1)
694 {
695 /* 0 or Infinity. We only keep fSign here. */
696 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
697 r64.s.uFractionHigh = 0;
698 r64.s.uFractionLow = 0;
699 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
700 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
701 }
702 else if (bType == 2 || bType == 3)
703 {
704 /* Subnormals */
705 if (bType == 3)
706 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
707 else if (r64.s64.uFraction == 0)
708 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
709 r64.s64.uExponent = 0;
710 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
711 }
712 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
713 {
714 /* NaNs */
715 if (bType & 1)
716 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
717 else if (r64.s64.uFraction == 0)
718 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
719 r64.s64.uExponent = 0x7ff;
720 if (bType < 6)
721 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
722 else
723 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
724 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
725 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
726 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
727 }
728 else if (bType < 12)
729 {
730 /* Make sure we have lots of normalized values. */
731 if (r64.s.uExponent == 0)
732 r64.s.uExponent = 1;
733 else if (r64.s.uExponent == 0x7ff)
734 r64.s.uExponent = 0x7fe;
735 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
736 }
737 return r64;
738}
739
740
741static RTFLOAT64U RandR64Src(uint32_t iTest)
742{
743 if (iTest < 16)
744 return RandR64Ex(iTest);
745 return RandR64Ex(RandU8());
746}
747
748
749/** Pairing with a 80-bit floating point arg. */
750static RTFLOAT64U RandR64Src2(uint32_t iTest)
751{
752 if (iTest < 12 * 10)
753 return RandR64Ex(9 - iTest % 10); /* start with normal values */
754 return RandR64Ex(RandU8());
755}
756
757
758static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
759{
760 if (pr32->s.uFraction >= RT_BIT_32(cShift))
761 pr32->s.uFraction >>= cShift;
762 else
763 pr32->s.uFraction = (cShift % 19) + 1;
764}
765
766
767static RTFLOAT32U RandR32Ex(uint8_t bType)
768{
769 RTFLOAT32U r32;
770 r32.u = RandU32();
771
772 /*
773 * Make it more likely that we get a good selection of special values.
774 * On average 6 out of 16 calls should return a special value.
775 */
776 bType &= 0xf;
777 if (bType == 0 || bType == 1)
778 {
779 /* 0 or Infinity. We only keep fSign here. */
780 r32.s.uExponent = bType == 0 ? 0 : 0xff;
781 r32.s.uFraction = 0;
782 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
783 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
784 }
785 else if (bType == 2 || bType == 3)
786 {
787 /* Subnormals */
788 if (bType == 3)
789 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
790 else if (r32.s.uFraction == 0)
791 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
792 r32.s.uExponent = 0;
793 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
794 }
795 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
796 {
797 /* NaNs */
798 if (bType & 1)
799 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
800 else if (r32.s.uFraction == 0)
801 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
802 r32.s.uExponent = 0xff;
803 if (bType < 6)
804 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
805 else
806 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
807 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
808 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
809 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
810 }
811 else if (bType < 12)
812 {
813 /* Make sure we have lots of normalized values. */
814 if (r32.s.uExponent == 0)
815 r32.s.uExponent = 1;
816 else if (r32.s.uExponent == 0xff)
817 r32.s.uExponent = 0xfe;
818 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
819 }
820 return r32;
821}
822
823
824static RTFLOAT32U RandR32Src(uint32_t iTest)
825{
826 if (iTest < 16)
827 return RandR32Ex(iTest);
828 return RandR32Ex(RandU8());
829}
830
831
832/** Pairing with a 80-bit floating point arg. */
833static RTFLOAT32U RandR32Src2(uint32_t iTest)
834{
835 if (iTest < 12 * 10)
836 return RandR32Ex(9 - iTest % 10); /* start with normal values */
837 return RandR32Ex(RandU8());
838}
839
840
841static RTPBCD80U RandD80Src(uint32_t iTest)
842{
843 if (iTest < 3)
844 {
845 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
846 return d80Zero;
847 }
848 if (iTest < 5)
849 {
850 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
851 return d80Ind;
852 }
853
854 RTPBCD80U d80;
855 uint8_t b = RandU8();
856 d80.s.fSign = b & 1;
857
858 if ((iTest & 7) >= 6)
859 {
860 /* Illegal */
861 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
862 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
863 d80.s.abPairs[iPair] = RandU8();
864 }
865 else
866 {
867 /* Normal */
868 d80.s.uPad = 0;
869 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
870 {
871 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
872 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
873 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
874 }
875 }
876 return d80;
877}
878
879# if 0 /* unused */
880
881static const char *GenFormatR80(PCRTFLOAT80U plrd)
882{
883 if (RTFLOAT80U_IS_ZERO(plrd))
884 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
885 if (RTFLOAT80U_IS_INF(plrd))
886 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
887 if (RTFLOAT80U_IS_INDEFINITE(plrd))
888 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
889 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
890 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
891 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
892 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
893
894 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
895 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
896 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
897 return pszBuf;
898}
899
900static const char *GenFormatR64(PCRTFLOAT64U prd)
901{
902 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
903 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
904 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
905 return pszBuf;
906}
907
908
909static const char *GenFormatR32(PCRTFLOAT32U pr)
910{
911 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
912 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
913 return pszBuf;
914}
915
916
917static const char *GenFormatD80(PCRTPBCD80U pd80)
918{
919 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
920 size_t off;
921 if (pd80->s.uPad == 0)
922 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
923 else
924 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
925 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
926 while (iPair-- > 0)
927 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
928 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
929 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
930 pszBuf[off++] = ')';
931 pszBuf[off++] = '\0';
932 return pszBuf;
933}
934
935
936static const char *GenFormatI64(int64_t i64)
937{
938 if (i64 == INT64_MIN) /* This one is problematic */
939 return "INT64_MIN";
940 if (i64 == INT64_MAX)
941 return "INT64_MAX";
942 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
943 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
944 return pszBuf;
945}
946
947# if 0 /* unused */
948static const char *GenFormatI64(int64_t const *pi64)
949{
950 return GenFormatI64(*pi64);
951}
952# endif
953
954static const char *GenFormatI32(int32_t i32)
955{
956 if (i32 == INT32_MIN) /* This one is problematic */
957 return "INT32_MIN";
958 if (i32 == INT32_MAX)
959 return "INT32_MAX";
960 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
961 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
962 return pszBuf;
963}
964
965
966const char *GenFormatI32(int32_t const *pi32)
967{
968 return GenFormatI32(*pi32);
969}
970
971
972const char *GenFormatI16(int16_t i16)
973{
974 if (i16 == INT16_MIN) /* This one is problematic */
975 return "INT16_MIN";
976 if (i16 == INT16_MAX)
977 return "INT16_MAX";
978 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
979 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
980 return pszBuf;
981}
982
983
984const char *GenFormatI16(int16_t const *pi16)
985{
986 return GenFormatI16(*pi16);
987}
988
989
990static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
991{
992 /* We want to tag the generated source code with the revision that produced it. */
993 static char s_szRev[] = "$Revision: 104269 $";
994 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
995 size_t cchRev = 0;
996 while (RT_C_IS_DIGIT(pszRev[cchRev]))
997 cchRev++;
998
999 RTStrmPrintf(pOut,
1000 "/* $Id: tstIEMAImpl.cpp 104269 2024-04-10 09:42:20Z vboxsync $ */\n"
1001 "/** @file\n"
1002 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
1003 " */\n"
1004 "\n"
1005 "/*\n"
1006 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
1007 " *\n"
1008 " * This file is part of VirtualBox base platform packages, as\n"
1009 " * available from https://www.virtualbox.org.\n"
1010 " *\n"
1011 " * This program is free software; you can redistribute it and/or\n"
1012 " * modify it under the terms of the GNU General Public License\n"
1013 " * as published by the Free Software Foundation, in version 3 of the\n"
1014 " * License.\n"
1015 " *\n"
1016 " * This program is distributed in the hope that it will be useful, but\n"
1017 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
1018 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
1019 " * General Public License for more details.\n"
1020 " *\n"
1021 " * You should have received a copy of the GNU General Public License\n"
1022 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
1023 " *\n"
1024 " * SPDX-License-Identifier: GPL-3.0-only\n"
1025 " */\n"
1026 "\n"
1027 "#include \"tstIEMAImpl.h\"\n"
1028 "\n"
1029 ,
1030 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
1031}
1032
1033
1034static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
1035{
1036 PRTSTREAM pOut = NULL;
1037 int rc = RTStrmOpen(pszFilename, "w", &pOut);
1038 if (RT_SUCCESS(rc))
1039 {
1040 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
1041 return pOut;
1042 }
1043 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
1044 return NULL;
1045}
1046
1047
1048static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
1049{
1050 RTStrmPrintf(pOut,
1051 "\n"
1052 "/* end of file */\n");
1053 int rc = RTStrmClose(pOut);
1054 if (RT_SUCCESS(rc))
1055 return rcExit;
1056 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
1057}
1058
1059
1060static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
1061{
1062 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
1063}
1064
1065
1066static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
1067{
1068 RTStrmPrintf(pOut,
1069 "};\n"
1070 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
1071 "\n",
1072 pszName, pszName);
1073}
1074
1075# endif /* unused */
1076
1077static void GenerateBinaryWrite(PIEMBINARYOUTPUT pBinOut, const void *pvData, size_t cbData)
1078{
1079 pBinOut->cbWritten += cbData; /* ignore errors - makes entry calculation simpler */
1080 if (RT_SUCCESS_NP(pBinOut->rcWrite))
1081 {
1082 pBinOut->rcWrite = RTVfsIoStrmWrite(pBinOut->hVfsUncompressed, pvData, cbData, true /*fBlocking*/, NULL);
1083 if (RT_SUCCESS(pBinOut->rcWrite))
1084 return;
1085 RTMsgError("Error writing '%s': %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1086 }
1087}
1088
1089static bool GenerateBinaryOpen(PIEMBINARYOUTPUT pBinOut, const char *pszFilenameFmt, const char *pszName,
1090 IEMTESTENTRYINFO const *pInfoToPreserve, uint32_t cbEntry)
1091{
1092 pBinOut->cbEntry = cbEntry;
1093 pBinOut->cbWritten = 0;
1094 pBinOut->hVfsFile = NIL_RTVFSFILE;
1095 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1096 if (pszFilenameFmt)
1097 {
1098 pBinOut->fNull = false;
1099 if (RTStrPrintf2(pBinOut->szFilename, sizeof(pBinOut->szFilename), pszFilenameFmt, pszName) > 0)
1100 {
1101 RTMsgInfo("GenerateBinaryOpen: %s...\n", pBinOut->szFilename);
1102 pBinOut->rcWrite = RTVfsFileOpenNormal(pBinOut->szFilename,
1103 RTFILE_O_CREATE_REPLACE | RTFILE_O_WRITE | RTFILE_O_DENY_READWRITE,
1104 &pBinOut->hVfsFile);
1105 if (RT_SUCCESS(pBinOut->rcWrite))
1106 {
1107 RTVFSIOSTREAM hVfsIoFile = RTVfsFileToIoStream(pBinOut->hVfsFile);
1108 if (hVfsIoFile != NIL_RTVFSIOSTREAM)
1109 {
1110 pBinOut->rcWrite = RTZipGzipCompressIoStream(hVfsIoFile, 0 /*fFlags*/, 9, &pBinOut->hVfsUncompressed);
1111 RTVfsIoStrmRelease(hVfsIoFile);
1112 if (RT_SUCCESS(pBinOut->rcWrite))
1113 {
1114 pBinOut->rcWrite = VINF_SUCCESS;
1115 pBinOut->fWroteHeader = false;
1116
1117 /* Write the header if applicable. */
1118 if ( !pInfoToPreserve
1119 || (pInfoToPreserve->uSvnRev != 0 && *pInfoToPreserve->pszCpuDesc))
1120 {
1121 IEMBINARYHEADER Hdr;
1122 RT_ZERO(Hdr);
1123 memcpy(Hdr.szMagic, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC));
1124 Hdr.cbEntry = cbEntry;
1125 Hdr.uSvnRev = pInfoToPreserve ? pInfoToPreserve->uSvnRev : g_uSvnRev;
1126 RTStrCopy(Hdr.szCpuDesc, sizeof(Hdr.szCpuDesc),
1127 pInfoToPreserve ? pInfoToPreserve->pszCpuDesc : g_szCpuDesc);
1128 GenerateBinaryWrite(pBinOut, &Hdr, sizeof(Hdr));
1129 pBinOut->fWroteHeader = true;
1130 }
1131
1132 return true;
1133 }
1134
1135 RTMsgError("RTZipGzipCompressIoStream: %Rrc", pBinOut->rcWrite);
1136 }
1137 else
1138 {
1139 RTMsgError("RTVfsFileToIoStream failed!");
1140 pBinOut->rcWrite = VERR_VFS_CHAIN_CAST_FAILED;
1141 }
1142 RTVfsFileRelease(pBinOut->hVfsFile);
1143 RTFileDelete(pBinOut->szFilename);
1144 }
1145 else
1146 RTMsgError("Failed to open '%s' for writing: %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1147 }
1148 else
1149 {
1150 RTMsgError("filename too long: %s + %s", pszFilenameFmt, pszName);
1151 pBinOut->rcWrite = VERR_BUFFER_OVERFLOW;
1152 }
1153 return false;
1154 }
1155 RTMsgInfo("GenerateBinaryOpen: %s -> /dev/null\n", pszName);
1156 pBinOut->rcWrite = VERR_IGNORED;
1157 pBinOut->fNull = true;
1158 pBinOut->fWroteHeader = false;
1159 pBinOut->szFilename[0] = '\0';
1160 return true;
1161}
1162
1163# define GENERATE_BINARY_OPEN(a_pBinOut, a_papszNameFmts, a_Entry) \
1164 GenerateBinaryOpen((a_pBinOut), a_papszNameFmts[(a_Entry).idxCpuEflFlavour], (a_Entry).pszName, \
1165 NULL /*pInfo*/, sizeof((a_Entry).paTests[0]))
1166
1167static bool GenerateBinaryClose(PIEMBINARYOUTPUT pBinOut)
1168{
1169 if (!pBinOut->fNull)
1170 {
1171 /* Write footer if we've written a header. */
1172 if (pBinOut->fWroteHeader)
1173 {
1174 IEMBINARYFOOTER Ftr;
1175 RT_ZERO(Ftr);
1176 memcpy(Ftr.szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC));
1177 Ftr.cbEntry = pBinOut->cbEntry;
1178 Ftr.cEntries = (uint32_t)((pBinOut->cbWritten - sizeof(IEMBINARYHEADER)) / pBinOut->cbEntry);
1179 Assert(Ftr.cEntries * pBinOut->cbEntry + sizeof(IEMBINARYHEADER) == pBinOut->cbWritten);
1180 GenerateBinaryWrite(pBinOut, &Ftr, sizeof(Ftr));
1181 }
1182
1183 /* This is rather jovial about rcWrite. */
1184 int const rc1 = RTVfsIoStrmFlush(pBinOut->hVfsUncompressed);
1185 RTVfsIoStrmRelease(pBinOut->hVfsUncompressed);
1186 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1187 if (RT_FAILURE(rc1))
1188 RTMsgError("Error flushing '%s' (uncompressed stream): %Rrc", pBinOut->szFilename, rc1);
1189
1190 int const rc2 = RTVfsFileFlush(pBinOut->hVfsFile);
1191 RTVfsFileRelease(pBinOut->hVfsFile);
1192 pBinOut->hVfsFile = NIL_RTVFSFILE;
1193 if (RT_FAILURE(rc2))
1194 RTMsgError("Error flushing '%s' (compressed file): %Rrc", pBinOut->szFilename, rc2);
1195
1196 return RT_SUCCESS(rc2) && RT_SUCCESS(rc1) && RT_SUCCESS(pBinOut->rcWrite);
1197 }
1198 return true;
1199}
1200
1201/* Helper for DumpAll. */
1202# define DUMP_ALL_FN(a_FnBaseName, a_aSubTests) \
1203 static RTEXITCODE a_FnBaseName ## DumpAll(const char * const * papszNameFmts) \
1204 { \
1205 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1206 { \
1207 AssertReturn(DECOMPRESS_TESTS(a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
1208 IEMBINARYOUTPUT BinOut; \
1209 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], \
1210 a_aSubTests[iFn].pszName, &a_aSubTests[iFn].Info, \
1211 sizeof(a_aSubTests[iFn].paTests[0])), \
1212 RTEXITCODE_FAILURE); \
1213 GenerateBinaryWrite(&BinOut, a_aSubTests[iFn].paTests, a_aSubTests[iFn].cTests); \
1214 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1215 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1216 } \
1217 return RTEXITCODE_SUCCESS; \
1218 }
1219#endif /* TSTIEMAIMPL_WITH_GENERATOR */
1220
1221
1222/*
1223 * Test helpers.
1224 */
1225static bool IsTestEnabled(const char *pszName)
1226{
1227 /* Process excludes first: */
1228 uint32_t i = g_cExcludeTestPatterns;
1229 while (i-- > 0)
1230 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1231 return false;
1232
1233 /* If no include patterns, everything is included: */
1234 i = g_cIncludeTestPatterns;
1235 if (!i)
1236 return true;
1237
1238 /* Otherwise only tests in the include patters gets tested: */
1239 while (i-- > 0)
1240 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1241 return true;
1242
1243 return false;
1244}
1245
1246
1247static bool SubTestAndCheckIfEnabled(const char *pszName)
1248{
1249 bool const fEnabled = IsTestEnabled(pszName);
1250 if (g_fVerboseSkipping || fEnabled)
1251 {
1252 RTTestSub(g_hTest, pszName);
1253 if (fEnabled)
1254 return true;
1255 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1256 }
1257 return false;
1258}
1259
1260
1261/** Decompresses test data before use as required. */
1262static int DecompressBinaryTest(void const *pvCompressed, uint32_t cbCompressed, size_t cbEntry, const char *pszWhat,
1263 void **ppvTests, uint32_t *pcTests, IEMTESTENTRYINFO *pInfo)
1264{
1265 /* Don't do it again. */
1266 if (pInfo->pvUncompressed && *ppvTests)
1267 return VINF_SUCCESS;
1268
1269 /* Open a memory stream for the compressed binary data. */
1270 RTVFSIOSTREAM hVfsIos = NIL_RTVFSIOSTREAM;
1271 int rc = RTVfsIoStrmFromBuffer(RTFILE_O_READ, pvCompressed, cbCompressed, &hVfsIos);
1272 RTTESTI_CHECK_RC_OK_RET(rc, rc);
1273
1274 /* Open a decompressed stream for it. */
1275 RTVFSIOSTREAM hVfsIosDecomp = NIL_RTVFSIOSTREAM;
1276 rc = RTZipGzipDecompressIoStream(hVfsIos, RTZIPGZIPDECOMP_F_ALLOW_ZLIB_HDR, &hVfsIosDecomp);
1277 RTTESTI_CHECK_RC_OK(rc);
1278 if (RT_SUCCESS(rc))
1279 {
1280 /* Initial output buffer allocation. */
1281 size_t cbDecompressedAlloc = cbCompressed <= _16M ? (size_t)cbCompressed * 16 : (size_t)cbCompressed * 4;
1282 uint8_t *pbDecompressed = (uint8_t *)RTMemAllocZ(cbDecompressedAlloc);
1283 if (pbDecompressed)
1284 {
1285 size_t off = 0;
1286 for (;;)
1287 {
1288 size_t cbRead = 0;
1289 rc = RTVfsIoStrmRead(hVfsIosDecomp, &pbDecompressed[off], cbDecompressedAlloc - off, true /*fBlocking*/, &cbRead);
1290 if (RT_FAILURE(rc))
1291 break;
1292 if (rc == VINF_EOF && cbRead == 0)
1293 break;
1294 off += cbRead;
1295
1296 if (cbDecompressedAlloc < off + 256)
1297 {
1298 size_t const cbNew = cbDecompressedAlloc < _128M ? cbDecompressedAlloc * 2 : cbDecompressedAlloc + _32M;
1299 void * const pvNew = RTMemRealloc(pbDecompressed, cbNew);
1300 AssertBreakStmt(pvNew, rc = VERR_NO_MEMORY);
1301 cbDecompressedAlloc = cbNew;
1302 pbDecompressed = (uint8_t *)pvNew;
1303 }
1304 }
1305 if (RT_SUCCESS(rc))
1306 {
1307 size_t const cbUncompressed = off;
1308
1309 /* Validate the header and footer if present and subtract them from 'off'. */
1310 IEMBINARYHEADER const *pHdr = NULL;
1311 if ( off >= sizeof(IEMTESTENTRYINFO)
1312 && memcmp(pbDecompressed, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC)) == 0)
1313 {
1314 pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1315 IEMBINARYFOOTER const *pFtr = (IEMBINARYFOOTER const *)&pbDecompressed[off - sizeof(IEMBINARYFOOTER)];
1316
1317 off -= sizeof(*pHdr) + sizeof(*pFtr);
1318 rc = VERR_IO_BAD_UNIT;
1319 if (pHdr->cbEntry != cbEntry)
1320 RTTestIFailed("Test entry size differs for '%s': %#x (header r%u), expected %#zx (uncompressed size %#zx)",
1321 pszWhat, pHdr->cbEntry, pHdr->uSvnRev, cbEntry, off + sizeof(*pHdr) + sizeof(*pFtr));
1322 else if (memcmp(pFtr->szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC)) != 0)
1323 RTTestIFailed("Wrong footer magic for '%s': %.*Rhxs\n", pszWhat, sizeof(pFtr->szMagic), pFtr->szMagic);
1324 else if (pFtr->cbEntry != cbEntry)
1325 RTTestIFailed("Wrong footer entry size for '%s': %#x, expected %#x\n", pszWhat, pFtr->cbEntry, cbEntry);
1326 else if (pFtr->cEntries != off / cbEntry)
1327 RTTestIFailed("Wrong footer entry count for '%s': %#x, expected %#x\n",
1328 pszWhat, pFtr->cEntries, off / cbEntry);
1329 else
1330 rc = VINF_SUCCESS;
1331 }
1332
1333 /* Validate the decompressed size wrt entry size. */
1334 if ((off % cbEntry) != 0 && RT_SUCCESS(rc))
1335 {
1336 RTTestIFailed("Uneven decompressed data size for '%s': %#zx vs entry size %#zx -> %#zx",
1337 pszWhat, off, cbEntry, off % cbEntry);
1338 rc = VERR_IO_BAD_LENGTH;
1339 }
1340
1341 if (RT_SUCCESS(rc))
1342 {
1343 /*
1344 * We're good.
1345 */
1346 /* Reallocate the block if it's way to big. */
1347 if (cbDecompressedAlloc - cbUncompressed > _512K)
1348 {
1349 void * const pvNew = RTMemRealloc(pbDecompressed, cbUncompressed);
1350 if (pvNew)
1351 {
1352 pbDecompressed = (uint8_t *)pvNew;
1353 if (pHdr)
1354 pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1355 }
1356 }
1357 RTMEM_MAY_LEAK(pbDecompressed);
1358
1359 /* Fill in the info and other return values. */
1360 pInfo->cbUncompressed = (uint32_t)cbUncompressed;
1361 pInfo->pvUncompressed = pbDecompressed;
1362 pInfo->pszCpuDesc = pHdr ? pHdr->szCpuDesc : NULL;
1363 pInfo->uSvnRev = pHdr ? pHdr->uSvnRev : 0;
1364 *pcTests = (uint32_t)(off / cbEntry);
1365 *ppvTests = pHdr ? (uint8_t *)(pHdr + 1) : pbDecompressed;
1366
1367 pbDecompressed = NULL;
1368 rc = VINF_SUCCESS;
1369 }
1370 }
1371 else
1372 RTTestIFailed("Failed to decompress binary stream '%s': %Rrc (off=%#zx, cbCompressed=%#x)",
1373 pszWhat, rc, off, cbCompressed);
1374 RTMemFree(pbDecompressed);
1375 }
1376 else
1377 {
1378 RTTestIFailed("Out of memory decompressing test data '%s'", pszWhat);
1379 rc = VERR_NO_MEMORY;
1380 }
1381 RTVfsIoStrmRelease(hVfsIosDecomp);
1382 }
1383 RTVfsIoStrmRelease(hVfsIos);
1384 return rc;
1385}
1386
1387#define DECOMPRESS_TESTS(a_Entry) \
1388 RT_SUCCESS(DecompressBinaryTest((a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1389 sizeof((a_Entry).paTests[0]), (a_Entry).pszName, \
1390 (void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info))
1391
1392/** Frees the decompressed test data. */
1393static void FreeDecompressedTests(void **ppvTests, uint32_t *pcTests, IEMTESTENTRYINFO *pInfo)
1394{
1395 RTMemFree(pInfo->pvUncompressed);
1396 pInfo->pvUncompressed = NULL;
1397 pInfo->cbUncompressed = 0;
1398 *ppvTests = NULL;
1399 *pcTests = 0;
1400}
1401
1402#define FREE_DECOMPRESSED_TESTS(a_Entry) \
1403 FreeDecompressedTests((void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1404
1405
1406/** Check if the test is enabled and decompresses test data. */
1407static int SubTestAndCheckIfEnabledAndDecompress(const char *pszName, void const *pvCompressed, uint32_t cbCompressed,
1408 size_t cbEntry, void **ppvTests, uint32_t *pcTests, IEMTESTENTRYINFO *pInfo)
1409{
1410 if (SubTestAndCheckIfEnabled(pszName))
1411 {
1412 int const rc = DecompressBinaryTest(pvCompressed, cbCompressed, cbEntry, pszName, ppvTests, pcTests, pInfo);
1413 if (RT_SUCCESS(rc))
1414 return true;
1415 }
1416 return false;
1417}
1418
1419#define SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_Entry) \
1420 SubTestAndCheckIfEnabledAndDecompress((a_Entry).pszName, (a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1421 sizeof((a_Entry).paTests[0]), \
1422 (void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1423
1424
1425static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1426{
1427 if (fActual == fExpected)
1428 return "";
1429
1430 uint32_t const fXor = fActual ^ fExpected;
1431 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1432 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1433
1434 static struct
1435 {
1436 const char *pszName;
1437 uint32_t fFlag;
1438 } const s_aFlags[] =
1439 {
1440#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1441 EFL_ENTRY(CF),
1442 EFL_ENTRY(PF),
1443 EFL_ENTRY(AF),
1444 EFL_ENTRY(ZF),
1445 EFL_ENTRY(SF),
1446 EFL_ENTRY(TF),
1447 EFL_ENTRY(IF),
1448 EFL_ENTRY(DF),
1449 EFL_ENTRY(OF),
1450 EFL_ENTRY(IOPL),
1451 EFL_ENTRY(NT),
1452 EFL_ENTRY(RF),
1453 EFL_ENTRY(VM),
1454 EFL_ENTRY(AC),
1455 EFL_ENTRY(VIF),
1456 EFL_ENTRY(VIP),
1457 EFL_ENTRY(ID),
1458 };
1459 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1460 if (s_aFlags[i].fFlag & fXor)
1461 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1462 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1463 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1464 return pszBuf;
1465}
1466
1467
1468static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1469{
1470 if (fActual == fExpected)
1471 return "";
1472
1473 uint16_t const fXor = fActual ^ fExpected;
1474 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1475 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1476
1477 static struct
1478 {
1479 const char *pszName;
1480 uint32_t fFlag;
1481 } const s_aFlags[] =
1482 {
1483#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1484 FSW_ENTRY(IE),
1485 FSW_ENTRY(DE),
1486 FSW_ENTRY(ZE),
1487 FSW_ENTRY(OE),
1488 FSW_ENTRY(UE),
1489 FSW_ENTRY(PE),
1490 FSW_ENTRY(SF),
1491 FSW_ENTRY(ES),
1492 FSW_ENTRY(C0),
1493 FSW_ENTRY(C1),
1494 FSW_ENTRY(C2),
1495 FSW_ENTRY(C3),
1496 FSW_ENTRY(B),
1497 };
1498 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1499 if (s_aFlags[i].fFlag & fXor)
1500 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1501 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1502 if (fXor & X86_FSW_TOP_MASK)
1503 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1504 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1505#if 0 /* For debugging fprem & fprem1 */
1506 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1507 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1508#endif
1509 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1510 return pszBuf;
1511}
1512
1513
1514static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1515{
1516 if (fActual == fExpected)
1517 return "";
1518
1519 uint16_t const fXor = fActual ^ fExpected;
1520 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1521 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1522
1523 static struct
1524 {
1525 const char *pszName;
1526 uint32_t fFlag;
1527 } const s_aFlags[] =
1528 {
1529#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1530 MXCSR_ENTRY(IE),
1531 MXCSR_ENTRY(DE),
1532 MXCSR_ENTRY(ZE),
1533 MXCSR_ENTRY(OE),
1534 MXCSR_ENTRY(UE),
1535 MXCSR_ENTRY(PE),
1536
1537 MXCSR_ENTRY(IM),
1538 MXCSR_ENTRY(DM),
1539 MXCSR_ENTRY(ZM),
1540 MXCSR_ENTRY(OM),
1541 MXCSR_ENTRY(UM),
1542 MXCSR_ENTRY(PM),
1543
1544 MXCSR_ENTRY(DAZ),
1545 MXCSR_ENTRY(FZ),
1546#undef MXCSR_ENTRY
1547 };
1548 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1549 if (s_aFlags[i].fFlag & fXor)
1550 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1551 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1552 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1553 return pszBuf;
1554}
1555
1556
1557static const char *FormatFcw(uint16_t fFcw)
1558{
1559 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1560
1561 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1562 switch (fFcw & X86_FCW_PC_MASK)
1563 {
1564 case X86_FCW_PC_24: pszPC = "PC24"; break;
1565 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1566 case X86_FCW_PC_53: pszPC = "PC53"; break;
1567 case X86_FCW_PC_64: pszPC = "PC64"; break;
1568 }
1569
1570 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1571 switch (fFcw & X86_FCW_RC_MASK)
1572 {
1573 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1574 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1575 case X86_FCW_RC_UP: pszRC = "UP"; break;
1576 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1577 }
1578 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1579
1580 static struct
1581 {
1582 const char *pszName;
1583 uint32_t fFlag;
1584 } const s_aFlags[] =
1585 {
1586#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1587 FCW_ENTRY(IM),
1588 FCW_ENTRY(DM),
1589 FCW_ENTRY(ZM),
1590 FCW_ENTRY(OM),
1591 FCW_ENTRY(UM),
1592 FCW_ENTRY(PM),
1593 { "6M", 64 },
1594 };
1595 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1596 if (fFcw & s_aFlags[i].fFlag)
1597 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1598
1599 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1600 return pszBuf;
1601}
1602
1603
1604static const char *FormatMxcsr(uint32_t fMxcsr)
1605{
1606 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1607
1608 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1609 switch (fMxcsr & X86_MXCSR_RC_MASK)
1610 {
1611 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1612 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1613 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1614 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1615 }
1616
1617 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1618 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1619 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1620
1621 static struct
1622 {
1623 const char *pszName;
1624 uint32_t fFlag;
1625 } const s_aFlags[] =
1626 {
1627#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1628 MXCSR_ENTRY(IE),
1629 MXCSR_ENTRY(DE),
1630 MXCSR_ENTRY(ZE),
1631 MXCSR_ENTRY(OE),
1632 MXCSR_ENTRY(UE),
1633 MXCSR_ENTRY(PE),
1634
1635 MXCSR_ENTRY(IM),
1636 MXCSR_ENTRY(DM),
1637 MXCSR_ENTRY(ZM),
1638 MXCSR_ENTRY(OM),
1639 MXCSR_ENTRY(UM),
1640 MXCSR_ENTRY(PM),
1641 { "6M", 64 },
1642 };
1643 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1644 if (fMxcsr & s_aFlags[i].fFlag)
1645 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1646
1647 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1648 return pszBuf;
1649}
1650
1651
1652static const char *FormatR80(PCRTFLOAT80U pr80)
1653{
1654 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1655 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1656 return pszBuf;
1657}
1658
1659
1660static const char *FormatR64(PCRTFLOAT64U pr64)
1661{
1662 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1663 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1664 return pszBuf;
1665}
1666
1667
1668static const char *FormatR32(PCRTFLOAT32U pr32)
1669{
1670 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1671 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1672 return pszBuf;
1673}
1674
1675
1676static const char *FormatD80(PCRTPBCD80U pd80)
1677{
1678 /* There is only one indefinite endcoding (same as for 80-bit
1679 floating point), so get it out of the way first: */
1680 if (RTPBCD80U_IS_INDEFINITE(pd80))
1681 return "Ind";
1682
1683 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1684 size_t off = 0;
1685 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1686 unsigned cBadDigits = 0;
1687 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1688 while (iPair-- > 0)
1689 {
1690 static const char s_szDigits[] = "0123456789abcdef";
1691 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1692 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1693 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1694 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1695 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1696 }
1697 if (cBadDigits || pd80->s.uPad != 0)
1698 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1699 pszBuf[off] = '\0';
1700 return pszBuf;
1701}
1702
1703
1704#if 0
1705static const char *FormatI64(int64_t const *piVal)
1706{
1707 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1708 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1709 return pszBuf;
1710}
1711#endif
1712
1713
1714static const char *FormatI32(int32_t const *piVal)
1715{
1716 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1717 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1718 return pszBuf;
1719}
1720
1721
1722static const char *FormatI16(int16_t const *piVal)
1723{
1724 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1725 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1726 return pszBuf;
1727}
1728
1729
1730static const char *FormatU128(PCRTUINT128U puVal)
1731{
1732 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1733 RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1734 return pszBuf;
1735}
1736
1737
1738/*
1739 * Binary operations.
1740 */
1741TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1742TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1743TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1744TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1745
1746#ifdef TSTIEMAIMPL_WITH_GENERATOR
1747# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1748static RTEXITCODE BinU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
1749{ \
1750 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1751 { \
1752 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1753 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1754 IEMBINARYOUTPUT BinOut; \
1755 if ( g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1756 && g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1757 continue; \
1758 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aBinU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
1759 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1760 { \
1761 a_TestType Test; \
1762 Test.fEflIn = RandEFlags(); \
1763 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1764 Test.uDstOut = Test.uDstIn; \
1765 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1766 if (g_aBinU ## a_cBits[iFn].uExtra) \
1767 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1768 Test.uMisc = 0; \
1769 Test.fEflOut = pfn(Test.fEflIn, &Test.uDstOut, Test.uSrcIn); \
1770 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1771 } \
1772 for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1773 { \
1774 a_TestType Test; \
1775 Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1776 : g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1777 Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1778 Test.uDstOut = Test.uDstIn; \
1779 Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1780 Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1781 Test.fEflOut = pfn(Test.fEflIn, &Test.uDstOut, Test.uSrcIn); \
1782 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1783 } \
1784 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1785 } \
1786 return RTEXITCODE_SUCCESS; \
1787} \
1788DUMP_ALL_FN(BinU ## a_cBits, g_aBinU ## a_cBits)
1789
1790#else
1791# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1792#endif
1793
1794
1795/** Based on a quick probe run, guess how long to run the benchmark. */
1796static uint32_t EstimateIterations(uint32_t cProbeIterations, uint64_t cNsProbe)
1797{
1798 uint64_t cPicoSecPerIteration = cNsProbe * 1000 / cProbeIterations;
1799 uint64_t cIterations = g_cPicoSecBenchmark / cPicoSecPerIteration;
1800 if (cIterations > _2G)
1801 return _2G;
1802 if (cIterations < _4K)
1803 return _4K;
1804 return RT_ALIGN_32((uint32_t)cIterations, _4K);
1805}
1806
1807
1808#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1809GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1810\
1811static uint64_t BinU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLBINU ## a_cBits pfn, a_TestType const *pEntry) \
1812{ \
1813 uint32_t const fEflIn = pEntry->fEflIn; \
1814 a_uType const uDstIn = pEntry->uDstIn; \
1815 a_uType const uSrcIn = pEntry->uSrcIn; \
1816 cIterations /= 4; \
1817 RTThreadYield(); \
1818 uint64_t const nsStart = RTTimeNanoTS(); \
1819 for (uint32_t i = 0; i < cIterations; i++) \
1820 { \
1821 a_uType uBenchDst = uDstIn; \
1822 pfn(fEflIn, &uBenchDst, uSrcIn); \
1823 \
1824 uBenchDst = uDstIn; \
1825 pfn(fEflIn, &uBenchDst, uSrcIn); \
1826 \
1827 uBenchDst = uDstIn; \
1828 pfn(fEflIn, &uBenchDst, uSrcIn); \
1829 \
1830 uBenchDst = uDstIn; \
1831 pfn(fEflIn, &uBenchDst, uSrcIn); \
1832 } \
1833 return RTTimeNanoTS() - nsStart; \
1834} \
1835\
1836static void BinU ## a_cBits ## Test(void) \
1837{ \
1838 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1839 { \
1840 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
1841 continue; \
1842 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1843 uint32_t const cTests = a_aSubTests[iFn].cTests; \
1844 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1845 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1846 if (!cTests) { RTTestSkipped(g_hTest, "no tests"); continue; } \
1847 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1848 { \
1849 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1850 { \
1851 a_uType uDst = paTests[iTest].uDstIn; \
1852 uint32_t fEfl = pfn(paTests[iTest].fEflIn, &uDst, paTests[iTest].uSrcIn); \
1853 if ( uDst != paTests[iTest].uDstOut \
1854 || fEfl != paTests[iTest].fEflOut) \
1855 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1856 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1857 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1858 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1859 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1860 else \
1861 { \
1862 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1863 fEfl = pfn(paTests[iTest].fEflIn, g_pu ## a_cBits, paTests[iTest].uSrcIn); \
1864 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1865 RTTEST_CHECK(g_hTest, fEfl == paTests[iTest].fEflOut); \
1866 } \
1867 } \
1868 \
1869 /* Benchmark if all succeeded. */ \
1870 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
1871 { \
1872 uint32_t const iTest = cTests / 2; \
1873 uint32_t const cIterations = EstimateIterations(_64K, BinU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
1874 uint64_t const cNsRealRun = BinU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
1875 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
1876 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
1877 } \
1878 \
1879 /* Next variation is native. */ \
1880 pfn = a_aSubTests[iFn].pfnNative; \
1881 } \
1882 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1883 } \
1884}
1885
1886
1887/*
1888 * 8-bit binary operations.
1889 */
1890static BINU8_T g_aBinU8[] =
1891{
1892 ENTRY_BIN(add_u8),
1893 ENTRY_BIN(add_u8_locked),
1894 ENTRY_BIN(adc_u8),
1895 ENTRY_BIN(adc_u8_locked),
1896 ENTRY_BIN(sub_u8),
1897 ENTRY_BIN(sub_u8_locked),
1898 ENTRY_BIN(sbb_u8),
1899 ENTRY_BIN(sbb_u8_locked),
1900 ENTRY_BIN(or_u8),
1901 ENTRY_BIN(or_u8_locked),
1902 ENTRY_BIN(xor_u8),
1903 ENTRY_BIN(xor_u8_locked),
1904 ENTRY_BIN(and_u8),
1905 ENTRY_BIN(and_u8_locked),
1906 ENTRY_BIN_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1907 ENTRY_BIN_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1908};
1909TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1910
1911
1912/*
1913 * 16-bit binary operations.
1914 */
1915#ifdef TSTIEMAIMPL_WITH_GENERATOR
1916static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1917{
1918 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1919 { UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1920};
1921#endif
1922static BINU16_T g_aBinU16[] =
1923{
1924 ENTRY_BIN_FIX(add_u16),
1925 ENTRY_BIN(add_u16_locked),
1926 ENTRY_BIN(adc_u16),
1927 ENTRY_BIN(adc_u16_locked),
1928 ENTRY_BIN(sub_u16),
1929 ENTRY_BIN(sub_u16_locked),
1930 ENTRY_BIN(sbb_u16),
1931 ENTRY_BIN(sbb_u16_locked),
1932 ENTRY_BIN(or_u16),
1933 ENTRY_BIN(or_u16_locked),
1934 ENTRY_BIN(xor_u16),
1935 ENTRY_BIN(xor_u16_locked),
1936 ENTRY_BIN(and_u16),
1937 ENTRY_BIN(and_u16_locked),
1938 ENTRY_BIN_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1939 ENTRY_BIN_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1940 ENTRY_BIN_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1941 ENTRY_BIN_EX(btc_u16, 1),
1942 ENTRY_BIN_EX(btc_u16_locked, 1),
1943 ENTRY_BIN_EX(btr_u16, 1),
1944 ENTRY_BIN_EX(btr_u16_locked, 1),
1945 ENTRY_BIN_EX(bts_u16, 1),
1946 ENTRY_BIN_EX(bts_u16_locked, 1),
1947 ENTRY_BIN_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1948 ENTRY_BIN_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1949 ENTRY_BIN_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1950 ENTRY_BIN_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1951 ENTRY_BIN_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1952 ENTRY_BIN_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1953 ENTRY_BIN(arpl),
1954};
1955TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1956
1957
1958/*
1959 * 32-bit binary operations.
1960 */
1961#ifdef TSTIEMAIMPL_WITH_GENERATOR
1962static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1963{
1964 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1965 { UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1966};
1967#endif
1968static BINU32_T g_aBinU32[] =
1969{
1970 ENTRY_BIN_FIX(add_u32),
1971 ENTRY_BIN(add_u32_locked),
1972 ENTRY_BIN(adc_u32),
1973 ENTRY_BIN(adc_u32_locked),
1974 ENTRY_BIN(sub_u32),
1975 ENTRY_BIN(sub_u32_locked),
1976 ENTRY_BIN(sbb_u32),
1977 ENTRY_BIN(sbb_u32_locked),
1978 ENTRY_BIN(or_u32),
1979 ENTRY_BIN(or_u32_locked),
1980 ENTRY_BIN(xor_u32),
1981 ENTRY_BIN(xor_u32_locked),
1982 ENTRY_BIN(and_u32),
1983 ENTRY_BIN(and_u32_locked),
1984 ENTRY_BIN_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1985 ENTRY_BIN_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1986 ENTRY_BIN_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1987 ENTRY_BIN_EX(btc_u32, 1),
1988 ENTRY_BIN_EX(btc_u32_locked, 1),
1989 ENTRY_BIN_EX(btr_u32, 1),
1990 ENTRY_BIN_EX(btr_u32_locked, 1),
1991 ENTRY_BIN_EX(bts_u32, 1),
1992 ENTRY_BIN_EX(bts_u32_locked, 1),
1993 ENTRY_BIN_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1994 ENTRY_BIN_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1995 ENTRY_BIN_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1996 ENTRY_BIN_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1997 ENTRY_BIN_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1998 ENTRY_BIN_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1999 ENTRY_BIN(adcx_u32),
2000 ENTRY_BIN(adox_u32),
2001};
2002TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
2003
2004
2005/*
2006 * 64-bit binary operations.
2007 */
2008#ifdef TSTIEMAIMPL_WITH_GENERATOR
2009static const BINU64_TEST_T g_aFixedTests_add_u64[] =
2010{
2011 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
2012 { UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
2013};
2014#endif
2015static BINU64_T g_aBinU64[] =
2016{
2017 ENTRY_BIN_FIX(add_u64),
2018 ENTRY_BIN(add_u64_locked),
2019 ENTRY_BIN(adc_u64),
2020 ENTRY_BIN(adc_u64_locked),
2021 ENTRY_BIN(sub_u64),
2022 ENTRY_BIN(sub_u64_locked),
2023 ENTRY_BIN(sbb_u64),
2024 ENTRY_BIN(sbb_u64_locked),
2025 ENTRY_BIN(or_u64),
2026 ENTRY_BIN(or_u64_locked),
2027 ENTRY_BIN(xor_u64),
2028 ENTRY_BIN(xor_u64_locked),
2029 ENTRY_BIN(and_u64),
2030 ENTRY_BIN(and_u64_locked),
2031 ENTRY_BIN_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
2032 ENTRY_BIN_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
2033 ENTRY_BIN_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
2034 ENTRY_BIN_EX(btc_u64, 1),
2035 ENTRY_BIN_EX(btc_u64_locked, 1),
2036 ENTRY_BIN_EX(btr_u64, 1),
2037 ENTRY_BIN_EX(btr_u64_locked, 1),
2038 ENTRY_BIN_EX(bts_u64, 1),
2039 ENTRY_BIN_EX(bts_u64_locked, 1),
2040 ENTRY_BIN_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2041 ENTRY_BIN_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2042 ENTRY_BIN_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2043 ENTRY_BIN_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2044 ENTRY_BIN_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
2045 ENTRY_BIN_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
2046 ENTRY_BIN(adcx_u64),
2047 ENTRY_BIN(adox_u64),
2048/** @todo popcnt */
2049/** @todo tzcnt */
2050/** @todo lzcnt */
2051};
2052TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
2053
2054
2055/*
2056 * XCHG
2057 */
2058static void XchgTest(void)
2059{
2060 if (!SubTestAndCheckIfEnabled("xchg"))
2061 return;
2062 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
2063 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
2064 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
2065 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
2066
2067 static struct
2068 {
2069 uint8_t cb; uint64_t fMask;
2070 union
2071 {
2072 uintptr_t pfn;
2073 FNIEMAIMPLXCHGU8 *pfnU8;
2074 FNIEMAIMPLXCHGU16 *pfnU16;
2075 FNIEMAIMPLXCHGU32 *pfnU32;
2076 FNIEMAIMPLXCHGU64 *pfnU64;
2077 } u;
2078 }
2079 s_aXchgWorkers[] =
2080 {
2081 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
2082 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
2083 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
2084 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
2085 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
2086 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
2087 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
2088 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
2089 };
2090 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
2091 {
2092 RTUINT64U uIn1, uIn2, uMem, uDst;
2093 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2094 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2095 if (uIn1.u == uIn2.u)
2096 uDst.u = uIn2.u = ~uIn2.u;
2097
2098 switch (s_aXchgWorkers[i].cb)
2099 {
2100 case 1:
2101 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
2102 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
2103 break;
2104 case 2:
2105 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
2106 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
2107 break;
2108 case 4:
2109 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
2110 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
2111 break;
2112 case 8:
2113 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
2114 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
2115 break;
2116 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
2117 }
2118
2119 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
2120 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
2121 }
2122}
2123
2124
2125/*
2126 * XADD
2127 */
2128static void XaddTest(void)
2129{
2130#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
2131 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
2132 static struct \
2133 { \
2134 const char * const pszName; \
2135 FNIEMAIMPLXADDU ## a_cBits * const pfn; \
2136 void const * const pvCompressedTests; \
2137 uint32_t const * const pcbCompressedTests; \
2138 BINU ## a_cBits ## _TEST_T const *paTests; \
2139 uint32_t cTests; \
2140 IEMTESTENTRYINFO Info; \
2141 } s_aFuncs[] = \
2142 { \
2143 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
2144 g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2145 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
2146 g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2147 }; \
2148 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2149 { \
2150 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2151 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2152 uint32_t const cTests = s_aFuncs[iFn].cTests; \
2153 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2154 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2155 { \
2156 uint32_t fEfl = paTests[iTest].fEflIn; \
2157 a_Type uSrc = paTests[iTest].uSrcIn; \
2158 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2159 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
2160 if ( fEfl != paTests[iTest].fEflOut \
2161 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
2162 || uSrc != paTests[iTest].uDstIn) \
2163 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2164 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
2165 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
2166 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2167 } \
2168 FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2169 } \
2170 } while(0)
2171 TEST_XADD(8, uint8_t, "%#04x");
2172 TEST_XADD(16, uint16_t, "%#06x");
2173 TEST_XADD(32, uint32_t, "%#010RX32");
2174 TEST_XADD(64, uint64_t, "%#010RX64");
2175}
2176
2177
2178/*
2179 * CMPXCHG
2180 */
2181
2182static void CmpXchgTest(void)
2183{
2184#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
2185 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
2186 static struct \
2187 { \
2188 const char * const pszName; \
2189 FNIEMAIMPLCMPXCHGU ## a_cBits * const pfn; \
2190 PFNIEMAIMPLBINU ## a_cBits const pfnSub; \
2191 void const * const pvCompressedTests; \
2192 uint32_t const * const pcbCompressedTests; \
2193 BINU ## a_cBits ## _TEST_T const *paTests; \
2194 uint32_t cTests; \
2195 IEMTESTENTRYINFO Info; \
2196 } s_aFuncs[] = \
2197 { \
2198 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
2199 g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2200 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
2201 g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2202 }; \
2203 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2204 { \
2205 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2206 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2207 uint32_t const cTests = s_aFuncs[iFn].cTests; \
2208 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2209 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2210 { \
2211 /* as is (99% likely to be negative). */ \
2212 uint32_t fEfl = paTests[iTest].fEflIn; \
2213 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
2214 a_Type uA = paTests[iTest].uDstIn; \
2215 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
2216 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
2217 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2218 if ( fEfl != paTests[iTest].fEflOut \
2219 || *g_pu ## a_cBits != uExpect \
2220 || uA != paTests[iTest].uSrcIn) \
2221 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2222 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
2223 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
2224 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2225 /* positive */ \
2226 uA = paTests[iTest].uDstIn; \
2227 uint32_t fEflExpect = s_aFuncs[iFn].pfnSub(paTests[iTest].fEflIn, &uA, uA); \
2228 fEfl = paTests[iTest].fEflIn; \
2229 uA = paTests[iTest].uDstIn; \
2230 *g_pu ## a_cBits = uA; \
2231 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2232 if ( fEfl != fEflExpect \
2233 || *g_pu ## a_cBits != uNew \
2234 || uA != paTests[iTest].uDstIn) \
2235 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2236 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
2237 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
2238 EFlagsDiff(fEfl, fEflExpect)); \
2239 } \
2240 FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2241 } \
2242 } while(0)
2243 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
2244 TEST_CMPXCHG(16, uint16_t, "%#06x");
2245 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
2246#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
2247 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
2248#endif
2249}
2250
2251
2252typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
2253
2254static uint64_t CmpXchg8bBench(uint32_t cIterations, FNIEMAIMPLCMPXCHG8B *pfn, uint64_t const uDstValue,
2255 uint64_t const uOldValue, uint64_t const uNewValue, uint32_t const fEflIn)
2256{
2257 cIterations /= 4;
2258 RTThreadYield();
2259 uint64_t const nsStart = RTTimeNanoTS();
2260 for (uint32_t i = 0; i < cIterations; i++)
2261 {
2262 RTUINT64U uA, uB;
2263 uint32_t fEfl = fEflIn;
2264 uint64_t uDst = uDstValue;
2265 uB.u = uNewValue;
2266 uA.u = uOldValue;
2267 pfn(&uDst, &uA, &uB, &fEfl);
2268
2269 fEfl = fEflIn;
2270 uDst = uDstValue;
2271 uB.u = uNewValue;
2272 uA.u = uOldValue;
2273 pfn(&uDst, &uA, &uB, &fEfl);
2274
2275 fEfl = fEflIn;
2276 uDst = uDstValue;
2277 uB.u = uNewValue;
2278 uA.u = uOldValue;
2279 pfn(&uDst, &uA, &uB, &fEfl);
2280
2281 fEfl = fEflIn;
2282 uDst = uDstValue;
2283 uB.u = uNewValue;
2284 uA.u = uOldValue;
2285 pfn(&uDst, &uA, &uB, &fEfl);
2286 }
2287 return RTTimeNanoTS() - nsStart;
2288}
2289
2290static void CmpXchg8bTest(void)
2291{
2292 static struct
2293 {
2294 const char *pszName;
2295 FNIEMAIMPLCMPXCHG8B *pfn;
2296 } const s_aFuncs[] =
2297 {
2298 { "cmpxchg8b", iemAImpl_cmpxchg8b },
2299 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
2300 };
2301 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2302 {
2303 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2304 continue;
2305 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2306 {
2307 uint64_t const uOldValue = RandU64();
2308 uint64_t const uNewValue = RandU64();
2309
2310 /* positive test. */
2311 RTUINT64U uA, uB;
2312 uB.u = uNewValue;
2313 uA.u = uOldValue;
2314 *g_pu64 = uOldValue;
2315 uint32_t fEflIn = RandEFlags();
2316 uint32_t fEfl = fEflIn;
2317 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2318 if ( fEfl != (fEflIn | X86_EFL_ZF)
2319 || *g_pu64 != uNewValue
2320 || uA.u != uOldValue)
2321 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2322 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
2323 fEfl, *g_pu64, uA.u,
2324 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2325 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2326
2327 /* negative */
2328 uint64_t const uExpect = ~uOldValue;
2329 *g_pu64 = uExpect;
2330 uA.u = uOldValue;
2331 uB.u = uNewValue;
2332 fEfl = fEflIn = RandEFlags();
2333 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2334 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2335 || *g_pu64 != uExpect
2336 || uA.u != uExpect)
2337 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2338 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
2339 fEfl, *g_pu64, uA.u,
2340 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2341 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2342
2343 if (iTest == 2 && g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0)
2344 {
2345 uint32_t cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn,
2346 uOldValue, uOldValue, uNewValue, fEflIn));
2347 uint64_t cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, uOldValue, uOldValue, uNewValue, fEflIn);
2348 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
2349 "%s-positive", s_aFuncs[iFn].pszName);
2350
2351 cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn,
2352 ~uOldValue, uOldValue, uNewValue, fEflIn));
2353 cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, ~uOldValue, uOldValue, uNewValue, fEflIn);
2354 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
2355 "%s-negative", s_aFuncs[iFn].pszName);
2356 }
2357 }
2358 }
2359}
2360
2361static void CmpXchg16bTest(void)
2362{
2363 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
2364 static struct
2365 {
2366 const char *pszName;
2367 FNIEMAIMPLCMPXCHG16B *pfn;
2368 } const s_aFuncs[] =
2369 {
2370 { "cmpxchg16b", iemAImpl_cmpxchg16b },
2371 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
2372#if !defined(RT_ARCH_ARM64)
2373 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
2374#endif
2375 };
2376 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2377 {
2378 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2379 continue;
2380#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
2381 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
2382 {
2383 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
2384 continue;
2385 }
2386#endif
2387 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2388 {
2389 RTUINT128U const uOldValue = RandU128();
2390 RTUINT128U const uNewValue = RandU128();
2391
2392 /* positive test. */
2393 RTUINT128U uA, uB;
2394 uB = uNewValue;
2395 uA = uOldValue;
2396 *g_pu128 = uOldValue;
2397 uint32_t fEflIn = RandEFlags();
2398 uint32_t fEfl = fEflIn;
2399 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2400 if ( fEfl != (fEflIn | X86_EFL_ZF)
2401 || g_pu128->s.Lo != uNewValue.s.Lo
2402 || g_pu128->s.Hi != uNewValue.s.Hi
2403 || uA.s.Lo != uOldValue.s.Lo
2404 || uA.s.Hi != uOldValue.s.Hi)
2405 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2406 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2407 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2408 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2409 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2410 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
2411 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2412 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2413
2414 /* negative */
2415 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
2416 *g_pu128 = uExpect;
2417 uA = uOldValue;
2418 uB = uNewValue;
2419 fEfl = fEflIn = RandEFlags();
2420 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2421 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2422 || g_pu128->s.Lo != uExpect.s.Lo
2423 || g_pu128->s.Hi != uExpect.s.Hi
2424 || uA.s.Lo != uExpect.s.Lo
2425 || uA.s.Hi != uExpect.s.Hi)
2426 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2427 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2428 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2429 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2430 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2431 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
2432 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2433 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2434 }
2435 }
2436}
2437
2438
2439/*
2440 * Double shifts.
2441 *
2442 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
2443 */
2444#ifdef TSTIEMAIMPL_WITH_GENERATOR
2445# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2446static RTEXITCODE ShiftDblU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2447{ \
2448 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2449 { \
2450 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2451 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2452 continue; \
2453 IEMBINARYOUTPUT BinOut; \
2454 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2455 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2456 { \
2457 a_TestType Test; \
2458 Test.fEflIn = RandEFlags(); \
2459 Test.fEflOut = Test.fEflIn; \
2460 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2461 Test.uDstOut = Test.uDstIn; \
2462 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2463 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2464 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
2465 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2466 } \
2467 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2468 } \
2469 return RTEXITCODE_SUCCESS; \
2470} \
2471DUMP_ALL_FN(ShiftDblU ## a_cBits, a_aSubTests)
2472
2473#else
2474# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2475#endif
2476
2477#define TEST_SHIFT_DBL(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2478TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
2479\
2480static a_SubTestType a_aSubTests[] = \
2481{ \
2482 ENTRY_BIN_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2483 ENTRY_BIN_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2484 ENTRY_BIN_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2485 ENTRY_BIN_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2486}; \
2487\
2488GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2489\
2490static uint64_t ShiftDblU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn, a_TestType const *pEntry) \
2491{ \
2492 uint32_t const fEflIn = pEntry->fEflIn; \
2493 a_uType const uDstIn = pEntry->uDstIn; \
2494 a_uType const uSrcIn = pEntry->uSrcIn; \
2495 a_uType const cShift = pEntry->uMisc; \
2496 cIterations /= 4; \
2497 RTThreadYield(); \
2498 uint64_t const nsStart = RTTimeNanoTS(); \
2499 for (uint32_t i = 0; i < cIterations; i++) \
2500 { \
2501 uint32_t fBenchEfl = fEflIn; \
2502 a_uType uBenchDst = uDstIn; \
2503 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2504 \
2505 fBenchEfl = fEflIn; \
2506 uBenchDst = uDstIn; \
2507 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2508 \
2509 fBenchEfl = fEflIn; \
2510 uBenchDst = uDstIn; \
2511 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2512 \
2513 fBenchEfl = fEflIn; \
2514 uBenchDst = uDstIn; \
2515 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \
2516 } \
2517 return RTTimeNanoTS() - nsStart; \
2518} \
2519\
2520static void ShiftDblU ## a_cBits ## Test(void) \
2521{ \
2522 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2523 { \
2524 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2525 continue; \
2526 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2527 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2528 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2529 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2530 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2531 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2532 { \
2533 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2534 { \
2535 uint32_t fEfl = paTests[iTest].fEflIn; \
2536 a_uType uDst = paTests[iTest].uDstIn; \
2537 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
2538 if ( uDst != paTests[iTest].uDstOut \
2539 || fEfl != paTests[iTest].fEflOut) \
2540 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
2541 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
2542 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
2543 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2544 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
2545 else \
2546 { \
2547 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2548 *g_pfEfl = paTests[iTest].fEflIn; \
2549 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
2550 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2551 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2552 } \
2553 } \
2554 \
2555 /* Benchmark if all succeeded. */ \
2556 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2557 { \
2558 uint32_t const iTest = cTests / 2; \
2559 uint32_t const cIterations = EstimateIterations(_64K, ShiftDblU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2560 uint64_t const cNsRealRun = ShiftDblU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2561 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
2562 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
2563 } \
2564 \
2565 /* Next variation is native. */ \
2566 pfn = a_aSubTests[iFn].pfnNative; \
2567 } \
2568 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2569 } \
2570}
2571TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
2572TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
2573TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
2574
2575#ifdef TSTIEMAIMPL_WITH_GENERATOR
2576static RTEXITCODE ShiftDblGenerate(uint32_t cTests, const char * const * papszNameFmts)
2577{
2578 RTEXITCODE rcExit = ShiftDblU16Generate(cTests, papszNameFmts);
2579 if (rcExit == RTEXITCODE_SUCCESS)
2580 rcExit = ShiftDblU32Generate(cTests, papszNameFmts);
2581 if (rcExit == RTEXITCODE_SUCCESS)
2582 rcExit = ShiftDblU64Generate(cTests, papszNameFmts);
2583 return rcExit;
2584}
2585
2586static RTEXITCODE ShiftDblDumpAll(const char * const * papszNameFmts)
2587{
2588 RTEXITCODE rcExit = ShiftDblU16DumpAll(papszNameFmts);
2589 if (rcExit == RTEXITCODE_SUCCESS)
2590 rcExit = ShiftDblU32DumpAll(papszNameFmts);
2591 if (rcExit == RTEXITCODE_SUCCESS)
2592 rcExit = ShiftDblU64DumpAll(papszNameFmts);
2593 return rcExit;
2594}
2595#endif
2596
2597static void ShiftDblTest(void)
2598{
2599 ShiftDblU16Test();
2600 ShiftDblU32Test();
2601 ShiftDblU64Test();
2602}
2603
2604
2605/*
2606 * Unary operators.
2607 *
2608 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2609 */
2610#ifdef TSTIEMAIMPL_WITH_GENERATOR
2611# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2612static RTEXITCODE UnaryU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2613{ \
2614 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2615 { \
2616 IEMBINARYOUTPUT BinOut; \
2617 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aUnaryU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
2618 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2619 { \
2620 a_TestType Test; \
2621 Test.fEflIn = RandEFlags(); \
2622 Test.fEflOut = Test.fEflIn; \
2623 Test.uDstIn = RandU ## a_cBits(); \
2624 Test.uDstOut = Test.uDstIn; \
2625 Test.uSrcIn = 0; \
2626 Test.uMisc = 0; \
2627 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2628 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2629 } \
2630 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2631 } \
2632 return RTEXITCODE_SUCCESS; \
2633} \
2634DUMP_ALL_FN(UnaryU ## a_cBits, g_aUnaryU ## a_cBits)
2635#else
2636# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2637#endif
2638
2639#define TEST_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2640TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2641static a_SubTestType a_aSubTests[] = \
2642{ \
2643 ENTRY_BIN(inc_u ## a_cBits), \
2644 ENTRY_BIN(inc_u ## a_cBits ## _locked), \
2645 ENTRY_BIN(dec_u ## a_cBits), \
2646 ENTRY_BIN(dec_u ## a_cBits ## _locked), \
2647 ENTRY_BIN(not_u ## a_cBits), \
2648 ENTRY_BIN(not_u ## a_cBits ## _locked), \
2649 ENTRY_BIN(neg_u ## a_cBits), \
2650 ENTRY_BIN(neg_u ## a_cBits ## _locked), \
2651}; \
2652\
2653GEN_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType) \
2654\
2655static uint64_t UnaryU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLUNARYU ## a_cBits pfn, a_TestType const *pEntry) \
2656{ \
2657 uint32_t const fEflIn = pEntry->fEflIn; \
2658 a_uType const uDstIn = pEntry->uDstIn; \
2659 cIterations /= 4; \
2660 RTThreadYield(); \
2661 uint64_t const nsStart = RTTimeNanoTS(); \
2662 for (uint32_t i = 0; i < cIterations; i++) \
2663 { \
2664 uint32_t fBenchEfl = fEflIn; \
2665 a_uType uBenchDst = uDstIn; \
2666 pfn(&uBenchDst, &fBenchEfl); \
2667 \
2668 fBenchEfl = fEflIn; \
2669 uBenchDst = uDstIn; \
2670 pfn(&uBenchDst, &fBenchEfl); \
2671 \
2672 fBenchEfl = fEflIn; \
2673 uBenchDst = uDstIn; \
2674 pfn(&uBenchDst, &fBenchEfl); \
2675 \
2676 fBenchEfl = fEflIn; \
2677 uBenchDst = uDstIn; \
2678 pfn(&uBenchDst, &fBenchEfl); \
2679 } \
2680 return RTTimeNanoTS() - nsStart; \
2681} \
2682\
2683static void UnaryU ## a_cBits ## Test(void) \
2684{ \
2685 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2686 { \
2687 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2688 continue; \
2689 PFNIEMAIMPLUNARYU ## a_cBits const pfn = a_aSubTests[iFn].pfn; \
2690 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2691 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2692 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2693 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2694 { \
2695 uint32_t fEfl = paTests[iTest].fEflIn; \
2696 a_uType uDst = paTests[iTest].uDstIn; \
2697 pfn(&uDst, &fEfl); \
2698 if ( uDst != paTests[iTest].uDstOut \
2699 || fEfl != paTests[iTest].fEflOut) \
2700 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2701 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2702 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2703 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2704 else \
2705 { \
2706 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2707 *g_pfEfl = paTests[iTest].fEflIn; \
2708 pfn(g_pu ## a_cBits, g_pfEfl); \
2709 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2710 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2711 } \
2712 } \
2713 \
2714 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2715 { \
2716 uint32_t const iTest = cTests / 2; \
2717 uint32_t const cIterations = EstimateIterations(_64K, UnaryU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2718 uint64_t const cNsRealRun = UnaryU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2719 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, "%s", a_aSubTests[iFn].pszName); \
2720 } \
2721 \
2722 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2723 } \
2724}
2725TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T, g_aUnaryU8)
2726TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T, g_aUnaryU16)
2727TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T, g_aUnaryU32)
2728TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T, g_aUnaryU64)
2729
2730#ifdef TSTIEMAIMPL_WITH_GENERATOR
2731static RTEXITCODE UnaryGenerate(uint32_t cTests, const char * const * papszNameFmts)
2732{
2733 RTEXITCODE rcExit = UnaryU8Generate(cTests, papszNameFmts);
2734 if (rcExit == RTEXITCODE_SUCCESS)
2735 rcExit = UnaryU16Generate(cTests, papszNameFmts);
2736 if (rcExit == RTEXITCODE_SUCCESS)
2737 rcExit = UnaryU32Generate(cTests, papszNameFmts);
2738 if (rcExit == RTEXITCODE_SUCCESS)
2739 rcExit = UnaryU64Generate(cTests, papszNameFmts);
2740 return rcExit;
2741}
2742
2743static RTEXITCODE UnaryDumpAll(const char * const * papszNameFmts)
2744{
2745 RTEXITCODE rcExit = UnaryU8DumpAll(papszNameFmts);
2746 if (rcExit == RTEXITCODE_SUCCESS)
2747 rcExit = UnaryU16DumpAll(papszNameFmts);
2748 if (rcExit == RTEXITCODE_SUCCESS)
2749 rcExit = UnaryU32DumpAll(papszNameFmts);
2750 if (rcExit == RTEXITCODE_SUCCESS)
2751 rcExit = UnaryU64DumpAll(papszNameFmts);
2752 return rcExit;
2753}
2754#endif
2755
2756static void UnaryTest(void)
2757{
2758 UnaryU8Test();
2759 UnaryU16Test();
2760 UnaryU32Test();
2761 UnaryU64Test();
2762}
2763
2764
2765/*
2766 * Shifts.
2767 *
2768 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2769 */
2770#ifdef TSTIEMAIMPL_WITH_GENERATOR
2771# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2772static RTEXITCODE ShiftU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2773{ \
2774 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2775 { \
2776 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2777 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2778 continue; \
2779 IEMBINARYOUTPUT BinOut; \
2780 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2781 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2782 { \
2783 a_TestType Test; \
2784 Test.fEflIn = RandEFlags(); \
2785 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2786 Test.uDstOut = Test.uDstIn; \
2787 Test.uSrcIn = 0; \
2788 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2789 Test.fEflOut = a_aSubTests[iFn].pfnNative(Test.fEflIn, &Test.uDstOut, Test.uMisc); \
2790 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2791 \
2792 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2793 Test.uDstOut = Test.uDstIn; \
2794 Test.fEflOut = a_aSubTests[iFn].pfnNative(Test.fEflIn, &Test.uDstOut, Test.uMisc); \
2795 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2796 } \
2797 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2798 } \
2799 return RTEXITCODE_SUCCESS; \
2800} \
2801DUMP_ALL_FN(ShiftU ## a_cBits, a_aSubTests)
2802#else
2803# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2804#endif
2805
2806#define TEST_SHIFT(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2807TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2808static a_SubTestType a_aSubTests[] = \
2809{ \
2810 ENTRY_BIN_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2811 ENTRY_BIN_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2812 ENTRY_BIN_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2813 ENTRY_BIN_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2814 ENTRY_BIN_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2815 ENTRY_BIN_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2816 ENTRY_BIN_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2817 ENTRY_BIN_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2818 ENTRY_BIN_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2819 ENTRY_BIN_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2820 ENTRY_BIN_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2821 ENTRY_BIN_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2822 ENTRY_BIN_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2823 ENTRY_BIN_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2824}; \
2825\
2826GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2827\
2828static uint64_t ShiftU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTU ## a_cBits pfn, a_TestType const *pEntry) \
2829{ \
2830 uint32_t const fEflIn = pEntry->fEflIn; \
2831 a_uType const uDstIn = pEntry->uDstIn; \
2832 a_uType const cShift = pEntry->uMisc; \
2833 cIterations /= 4; \
2834 RTThreadYield(); \
2835 uint64_t const nsStart = RTTimeNanoTS(); \
2836 for (uint32_t i = 0; i < cIterations; i++) \
2837 { \
2838 a_uType uBenchDst = uDstIn; \
2839 pfn(fEflIn, &uBenchDst, cShift); \
2840 \
2841 uBenchDst = uDstIn; \
2842 pfn(fEflIn, &uBenchDst, cShift); \
2843 \
2844 uBenchDst = uDstIn; \
2845 pfn(fEflIn, &uBenchDst, cShift); \
2846 \
2847 uBenchDst = uDstIn; \
2848 pfn(fEflIn, &uBenchDst, cShift); \
2849 } \
2850 return RTTimeNanoTS() - nsStart; \
2851} \
2852\
2853static void ShiftU ## a_cBits ## Test(void) \
2854{ \
2855 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2856 { \
2857 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2858 continue; \
2859 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2860 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2861 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2862 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2863 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2864 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2865 { \
2866 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2867 { \
2868 a_uType uDst = paTests[iTest].uDstIn; \
2869 uint32_t fEflOut = pfn(paTests[iTest].fEflIn, &uDst, paTests[iTest].uMisc); \
2870 if ( uDst != paTests[iTest].uDstOut \
2871 || fEflOut != paTests[iTest].fEflOut ) \
2872 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2873 iTest, iVar == 0 ? "" : "/n", \
2874 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2875 fEflOut, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2876 EFlagsDiff(fEflOut, paTests[iTest].fEflOut)); \
2877 else \
2878 { \
2879 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2880 fEflOut = pfn(paTests[iTest].fEflIn, g_pu ## a_cBits, paTests[iTest].uMisc); \
2881 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2882 RTTEST_CHECK(g_hTest, fEflOut == paTests[iTest].fEflOut); \
2883 } \
2884 } \
2885 \
2886 /* Benchmark if all succeeded. */ \
2887 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
2888 { \
2889 uint32_t const iTest = cTests / 2; \
2890 uint32_t const cIterations = EstimateIterations(_64K, ShiftU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
2891 uint64_t const cNsRealRun = ShiftU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
2892 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
2893 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
2894 } \
2895 \
2896 /* Next variation is native. */ \
2897 pfn = a_aSubTests[iFn].pfnNative; \
2898 } \
2899 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2900 } \
2901}
2902TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2903TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2904TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2905TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2906
2907#ifdef TSTIEMAIMPL_WITH_GENERATOR
2908static RTEXITCODE ShiftGenerate(uint32_t cTests, const char * const * papszNameFmts)
2909{
2910 RTEXITCODE rcExit = ShiftU8Generate(cTests, papszNameFmts);
2911 if (rcExit == RTEXITCODE_SUCCESS)
2912 rcExit = ShiftU16Generate(cTests, papszNameFmts);
2913 if (rcExit == RTEXITCODE_SUCCESS)
2914 rcExit = ShiftU32Generate(cTests, papszNameFmts);
2915 if (rcExit == RTEXITCODE_SUCCESS)
2916 rcExit = ShiftU64Generate(cTests, papszNameFmts);
2917 return rcExit;
2918}
2919
2920static RTEXITCODE ShiftDumpAll(const char * const * papszNameFmts)
2921{
2922 RTEXITCODE rcExit = ShiftU8DumpAll(papszNameFmts);
2923 if (rcExit == RTEXITCODE_SUCCESS)
2924 rcExit = ShiftU16DumpAll(papszNameFmts);
2925 if (rcExit == RTEXITCODE_SUCCESS)
2926 rcExit = ShiftU32DumpAll(papszNameFmts);
2927 if (rcExit == RTEXITCODE_SUCCESS)
2928 rcExit = ShiftU64DumpAll(papszNameFmts);
2929 return rcExit;
2930}
2931#endif
2932
2933static void ShiftTest(void)
2934{
2935 ShiftU8Test();
2936 ShiftU16Test();
2937 ShiftU32Test();
2938 ShiftU64Test();
2939}
2940
2941
2942/*
2943 * Multiplication and division.
2944 *
2945 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2946 * Note! Currently ignoring undefined bits.
2947 */
2948
2949/* U8 */
2950#ifdef TSTIEMAIMPL_WITH_GENERATOR
2951static const MULDIVU8_TEST_T g_aFixedTests_idiv_u8[] =
2952{
2953 /* efl in, efl out, uDstIn, uDstOut, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
2954 { UINT32_MAX, 0, 0x8000, 0, 0xc7, -1 }, /* -32768 / -57 = #DE (574.8771929824...) */
2955 { UINT32_MAX, 0, 0x8000, 0, 0xdd, -128 }, /* -32768 / -35 = #DE (936.2285714285...) */
2956 { UINT32_MAX, 0, 0x7f00, 0, 0x7f, -1 }, /* 0x7f00 / 0x7f = #DE (0x100) */
2957 { UINT32_MAX, 0, 0x3f80, 0, 0x7f, -1 }, /* 0x3F80 / 0x7f = #DE (0x80) */
2958 { UINT32_MAX, 0, 0x3f7f, 0, 0x7f, 0 }, /* 0x3F7F / 0x7f = 127.992125984... */
2959 { UINT32_MAX, 0, 0xc000, 0, 0x80, -1 }, /* -16384 / -128 = #DE (0x80) */
2960 { UINT32_MAX, 0, 0xc001, 0, 0x80, 0 }, /* -16383 / -128 = 127.9921875 */
2961};
2962#endif
2963TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2964static INT_MULDIV_U8_T g_aMulDivU8[] =
2965{
2966 ENTRY_BIN_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2967 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2968 ENTRY_BIN_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2969 ENTRY_BIN_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2970 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2971 ENTRY_BIN_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2972 ENTRY_BIN_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2973 ENTRY_BIN_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2974 ENTRY_BIN_FIX_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2975 ENTRY_BIN_FIX_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2976};
2977
2978#ifdef TSTIEMAIMPL_WITH_GENERATOR
2979DUMP_ALL_FN(MulDivU8, g_aMulDivU8)
2980static RTEXITCODE MulDivU8Generate(uint32_t cTests, const char * const * papszNameFmts)
2981{
2982 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2983 {
2984 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2985 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2986 continue;
2987 IEMBINARYOUTPUT BinOut; \
2988 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aMulDivU8[iFn]), RTEXITCODE_FAILURE); \
2989 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2990 {
2991 MULDIVU8_TEST_T Test;
2992 Test.fEflIn = RandEFlags();
2993 Test.fEflOut = Test.fEflIn;
2994 Test.uDstIn = RandU16Dst(iTest);
2995 Test.uDstOut = Test.uDstIn;
2996 Test.uSrcIn = RandU8Src(iTest);
2997 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2998 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
2999 }
3000 for (uint32_t iTest = 0; iTest < g_aMulDivU8[iFn].cFixedTests; iTest++)
3001 {
3002 MULDIVU8_TEST_T Test;
3003 Test.fEflIn = g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags()
3004 : g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn;
3005 Test.fEflOut = Test.fEflIn;
3006 Test.uDstIn = g_aMulDivU8[iFn].paFixedTests[iTest].uDstIn;
3007 Test.uDstOut = Test.uDstIn;
3008 Test.uSrcIn = g_aMulDivU8[iFn].paFixedTests[iTest].uSrcIn;
3009 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
3010 if (g_aMulDivU8[iFn].paFixedTests[iTest].rc == 0 || g_aMulDivU8[iFn].paFixedTests[iTest].rc == -1)
3011 Test.rc = g_aMulDivU8[iFn].paFixedTests[iTest].rc;
3012 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3013 }
3014 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3015 }
3016 return RTEXITCODE_SUCCESS;
3017}
3018#endif
3019
3020static uint64_t MulDivU8Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU8 pfn, MULDIVU8_TEST_T const *pEntry)
3021{
3022 uint32_t const fEflIn = pEntry->fEflIn;
3023 uint16_t const uDstIn = pEntry->uDstIn;
3024 uint8_t const uSrcIn = pEntry->uSrcIn;
3025 cIterations /= 4;
3026 RTThreadYield();
3027 uint64_t const nsStart = RTTimeNanoTS();
3028 for (uint32_t i = 0; i < cIterations; i++)
3029 {
3030 uint32_t fBenchEfl = fEflIn;
3031 uint16_t uBenchDst = uDstIn;
3032 pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3033
3034 fBenchEfl = fEflIn;
3035 uBenchDst = uDstIn;
3036 pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3037
3038 fBenchEfl = fEflIn;
3039 uBenchDst = uDstIn;
3040 pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3041
3042 fBenchEfl = fEflIn;
3043 uBenchDst = uDstIn;
3044 pfn(&uBenchDst, uSrcIn, &fBenchEfl);
3045 }
3046 return RTTimeNanoTS() - nsStart;
3047}
3048
3049static void MulDivU8Test(void)
3050{
3051 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
3052 {
3053 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn]))
3054 continue;
3055 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
3056 uint32_t const cTests = g_aMulDivU8[iFn].cTests;
3057 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
3058 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
3059 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]);
3060 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3061 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3062 {
3063 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
3064 {
3065 uint32_t fEfl = paTests[iTest].fEflIn;
3066 uint16_t uDst = paTests[iTest].uDstIn;
3067 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
3068 if ( uDst != paTests[iTest].uDstOut
3069 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
3070 || rc != paTests[iTest].rc)
3071 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
3072 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
3073 "%sexpected %#08x %#06RX16 %d%s\n",
3074 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
3075 iVar ? " " : "", fEfl, uDst, rc,
3076 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
3077 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
3078 else
3079 {
3080 *g_pu16 = paTests[iTest].uDstIn;
3081 *g_pfEfl = paTests[iTest].fEflIn;
3082 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
3083 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
3084 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
3085 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
3086 }
3087 }
3088
3089 /* Benchmark if all succeeded. */
3090 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0)
3091 {
3092 uint32_t const iTest = cTests / 2;
3093 uint32_t const cIterations = EstimateIterations(_64K, MulDivU8Bench(_64K, pfn, &paTests[iTest]));
3094 uint64_t const cNsRealRun = MulDivU8Bench(cIterations, pfn, &paTests[iTest]);
3095 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL,
3096 "%s%s", g_aMulDivU8[iFn].pszName, iVar ? "-native" : "");
3097 }
3098
3099 /* Next variation is native. */
3100 pfn = g_aMulDivU8[iFn].pfnNative;
3101 }
3102 FREE_DECOMPRESSED_TESTS(g_aMulDivU8[iFn]);
3103 }
3104}
3105
3106#ifdef TSTIEMAIMPL_WITH_GENERATOR
3107static const MULDIVU16_TEST_T g_aFixedTests_idiv_u16[] =
3108{
3109 /* low high */
3110 /* --- eflags ---, -- uDst1 --, -- uDst2 --, */
3111 /* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3112 { UINT32_MAX, 0, 0x0000, 0, 0x8000, 0, 0xc004, -1 }, /* -2147483648 /-16380 = #DE (131104.00781...) */
3113 { UINT32_MAX, 0, 0xffff, 0, 0x7fff, 0, 0x7fff, -1 }, /* 2147483647 / 32767 = #DE (65538.000030...) */
3114 { UINT32_MAX, 0, 0x8000, 0, 0x3fff, 0, 0x7fff, -1 }, /* 0x3fff8000 / 0x7fff = #DE (0x8000) */
3115 { UINT32_MAX, 0, 0x7fff, 0, 0x3fff, 0, 0x7fff, 0 }, /* 0x3fff7fff / 0x7fff = 32767.99996948... */
3116 { UINT32_MAX, 0, 0x0000, 0, 0xc000, 0, 0x8000, -1 }, /* -1073741824 / -32768 = #DE (0x8000) */
3117 { UINT32_MAX, 0, 0x0001, 0, 0xc000, 0, 0x8000, 0 }, /* -1073741823 / -32768 = 32767.999969482421875 */
3118};
3119
3120static const MULDIVU32_TEST_T g_aFixedTests_idiv_u32[] =
3121{
3122 /* low high */
3123 /* --- eflags ---, ---- uDst1 ----, ---- uDst2 ----, */
3124 /* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3125 { UINT32_MAX, 0, 0x00000000, 0, 0x80000000, 0, 0xc0000004, -1 },
3126 { UINT32_MAX, 0, 0xffffffff, 0, 0x7fffffff, 0, 0x7fffffff, -1 },
3127 { UINT32_MAX, 0, 0x80000000, 0, 0x3fffffff, 0, 0x7fffffff, -1 },
3128 { UINT32_MAX, 0, 0x7fffffff, 0, 0x3fffffff, 0, 0x7fffffff, 0 },
3129 { UINT32_MAX, 0, 0x00000000, 0, 0xc0000000, 0, 0x80000000, -1 },
3130 { UINT32_MAX, 0, 0x00000001, 0, 0xc0000000, 0, 0x80000000, 0 },
3131};
3132
3133static const MULDIVU64_TEST_T g_aFixedTests_idiv_u64[] =
3134{
3135 /* low high */
3136 /* --- eflags ---, -------- uDst1 --------, -------- uDst2 --------, */
3137 /* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
3138 { UINT32_MAX, 0, 0x0000000000000000, 0, 0x8000000000000000, 0, 0xc000000000000004, -1 },
3139 { UINT32_MAX, 0, 0xffffffffffffffff, 0, 0x7fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
3140 { UINT32_MAX, 0, 0x8000000000000000, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
3141 { UINT32_MAX, 0, 0x7fffffffffffffff, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, 0 },
3142 { UINT32_MAX, 0, 0x0000000000000000, 0, 0xc000000000000000, 0, 0x8000000000000000, -1 },
3143 { UINT32_MAX, 0, 0x0000000000000001, 0, 0xc000000000000000, 0, 0x8000000000000000, 0 },
3144};
3145
3146# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
3147DUMP_ALL_FN(MulDivU ## a_cBits, a_aSubTests) \
3148static RTEXITCODE MulDivU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
3149{ \
3150 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3151 { \
3152 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
3153 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3154 continue; \
3155 IEMBINARYOUTPUT BinOut; \
3156 a_TestType Test; \
3157 RT_ZERO(Test); /* 64-bit variant contains alignment padding */ \
3158 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3159 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
3160 { \
3161 Test.fEflIn = RandEFlags(); \
3162 Test.fEflOut = Test.fEflIn; \
3163 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
3164 Test.uDst1Out = Test.uDst1In; \
3165 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
3166 Test.uDst2Out = Test.uDst2In; \
3167 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
3168 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
3169 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3170 } \
3171 for (uint32_t iTest = 0; iTest < a_aSubTests[iFn].cFixedTests; iTest++ ) \
3172 { \
3173 Test.fEflIn = a_aSubTests[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
3174 : a_aSubTests[iFn].paFixedTests[iTest].fEflIn; \
3175 Test.fEflOut = Test.fEflIn; \
3176 Test.uDst1In = a_aSubTests[iFn].paFixedTests[iTest].uDst1In; \
3177 Test.uDst1Out = Test.uDst1In; \
3178 Test.uDst2In = a_aSubTests[iFn].paFixedTests[iTest].uDst2In; \
3179 Test.uDst2Out = Test.uDst2In; \
3180 Test.uSrcIn = a_aSubTests[iFn].paFixedTests[iTest].uSrcIn; \
3181 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
3182 if (a_aSubTests[iFn].paFixedTests[iTest].rc == 0 || a_aSubTests[iFn].paFixedTests[iTest].rc == -1) \
3183 Test.rc = a_aSubTests[iFn].paFixedTests[iTest].rc; \
3184 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3185 } \
3186 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3187 } \
3188 return RTEXITCODE_SUCCESS; \
3189}
3190#else
3191# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
3192#endif
3193
3194#define TEST_MULDIV(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
3195TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
3196static a_SubTestType a_aSubTests [] = \
3197{ \
3198 ENTRY_BIN_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
3199 ENTRY_BIN_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
3200 ENTRY_BIN_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
3201 ENTRY_BIN_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
3202 ENTRY_BIN_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
3203 ENTRY_BIN_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
3204 ENTRY_BIN_FIX_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
3205 ENTRY_BIN_FIX_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
3206}; \
3207\
3208GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
3209\
3210static uint64_t MulDivU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU ## a_cBits pfn, a_TestType const *pEntry) \
3211{ \
3212 uint32_t const fEflIn = pEntry->fEflIn; \
3213 a_uType const uDst1In = pEntry->uDst1In; \
3214 a_uType const uDst2In = pEntry->uDst2In; \
3215 a_uType const uSrcIn = pEntry->uSrcIn; \
3216 cIterations /= 4; \
3217 RTThreadYield(); \
3218 uint64_t const nsStart = RTTimeNanoTS(); \
3219 for (uint32_t i = 0; i < cIterations; i++) \
3220 { \
3221 uint32_t fBenchEfl = fEflIn; \
3222 a_uType uBenchDst1 = uDst1In; \
3223 a_uType uBenchDst2 = uDst2In; \
3224 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3225 \
3226 fBenchEfl = fEflIn; \
3227 uBenchDst1 = uDst1In; \
3228 uBenchDst2 = uDst2In; \
3229 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3230 \
3231 fBenchEfl = fEflIn; \
3232 uBenchDst1 = uDst1In; \
3233 uBenchDst2 = uDst2In; \
3234 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3235 \
3236 fBenchEfl = fEflIn; \
3237 uBenchDst1 = uDst1In; \
3238 uBenchDst2 = uDst2In; \
3239 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \
3240 } \
3241 return RTTimeNanoTS() - nsStart; \
3242} \
3243\
3244static void MulDivU ## a_cBits ## Test(void) \
3245{ \
3246 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3247 { \
3248 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3249 continue; \
3250 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3251 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3252 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
3253 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3254 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3255 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3256 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3257 { \
3258 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
3259 { \
3260 uint32_t fEfl = paTests[iTest].fEflIn; \
3261 a_uType uDst1 = paTests[iTest].uDst1In; \
3262 a_uType uDst2 = paTests[iTest].uDst2In; \
3263 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
3264 if ( uDst1 != paTests[iTest].uDst1Out \
3265 || uDst2 != paTests[iTest].uDst2Out \
3266 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
3267 || rc != paTests[iTest].rc) \
3268 RTTestFailed(g_hTest, "#%04u%s: efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
3269 " -> efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
3270 " expected %#010x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
3271 iTest, iVar == 0 ? " " : "/n", \
3272 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
3273 fEfl, uDst1, uDst2, rc, \
3274 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
3275 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
3276 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
3277 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
3278 else \
3279 { \
3280 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
3281 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
3282 *g_pfEfl = paTests[iTest].fEflIn; \
3283 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
3284 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
3285 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
3286 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
3287 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
3288 } \
3289 } \
3290 \
3291 /* Benchmark if all succeeded. */ \
3292 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
3293 { \
3294 uint32_t const iTest = cTests / 2; \
3295 uint32_t const cIterations = EstimateIterations(_64K, MulDivU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
3296 uint64_t const cNsRealRun = MulDivU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
3297 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
3298 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
3299 } \
3300 \
3301 /* Next variation is native. */ \
3302 pfn = a_aSubTests[iFn].pfnNative; \
3303 } \
3304 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3305 } \
3306} //1068553096 = 0x3FB0D388 (1068553096)
3307TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
3308TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
3309TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
3310
3311#ifdef TSTIEMAIMPL_WITH_GENERATOR
3312static RTEXITCODE MulDivGenerate(uint32_t cTests, const char * const * papszNameFmts)
3313{
3314 RTEXITCODE rcExit = MulDivU8Generate(cTests, papszNameFmts);
3315 if (rcExit == RTEXITCODE_SUCCESS)
3316 rcExit = MulDivU16Generate(cTests, papszNameFmts);
3317 if (rcExit == RTEXITCODE_SUCCESS)
3318 rcExit = MulDivU32Generate(cTests, papszNameFmts);
3319 if (rcExit == RTEXITCODE_SUCCESS)
3320 rcExit = MulDivU64Generate(cTests, papszNameFmts);
3321 return rcExit;
3322}
3323
3324static RTEXITCODE MulDivDumpAll(const char * const * papszNameFmts)
3325{
3326 RTEXITCODE rcExit = MulDivU8DumpAll(papszNameFmts);
3327 if (rcExit == RTEXITCODE_SUCCESS)
3328 rcExit = MulDivU16DumpAll(papszNameFmts);
3329 if (rcExit == RTEXITCODE_SUCCESS)
3330 rcExit = MulDivU32DumpAll(papszNameFmts);
3331 if (rcExit == RTEXITCODE_SUCCESS)
3332 rcExit = MulDivU64DumpAll(papszNameFmts);
3333 return rcExit;
3334}
3335#endif
3336
3337static void MulDivTest(void)
3338{
3339 MulDivU8Test();
3340 MulDivU16Test();
3341 MulDivU32Test();
3342 MulDivU64Test();
3343}
3344
3345
3346/*
3347 * BSWAP
3348 */
3349static void BswapTest(void)
3350{
3351 if (SubTestAndCheckIfEnabled("bswap_u16"))
3352 {
3353 *g_pu32 = UINT32_C(0x12345678);
3354 iemAImpl_bswap_u16(g_pu32);
3355#if 0
3356 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3357#else
3358 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3359#endif
3360 *g_pu32 = UINT32_C(0xffff1122);
3361 iemAImpl_bswap_u16(g_pu32);
3362#if 0
3363 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3364#else
3365 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3366#endif
3367 }
3368
3369 if (SubTestAndCheckIfEnabled("bswap_u32"))
3370 {
3371 *g_pu32 = UINT32_C(0x12345678);
3372 iemAImpl_bswap_u32(g_pu32);
3373 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
3374 }
3375
3376 if (SubTestAndCheckIfEnabled("bswap_u64"))
3377 {
3378 *g_pu64 = UINT64_C(0x0123456789abcdef);
3379 iemAImpl_bswap_u64(g_pu64);
3380 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
3381 }
3382}
3383
3384
3385
3386/*********************************************************************************************************************************
3387* Floating point (x87 style) *
3388*********************************************************************************************************************************/
3389
3390/*
3391 * FPU constant loading.
3392 */
3393TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
3394
3395static FPU_LD_CONST_T g_aFpuLdConst[] =
3396{
3397 ENTRY_BIN(fld1),
3398 ENTRY_BIN(fldl2t),
3399 ENTRY_BIN(fldl2e),
3400 ENTRY_BIN(fldpi),
3401 ENTRY_BIN(fldlg2),
3402 ENTRY_BIN(fldln2),
3403 ENTRY_BIN(fldz),
3404};
3405
3406#ifdef TSTIEMAIMPL_WITH_GENERATOR
3407static RTEXITCODE FpuLdConstGenerate(uint32_t cTests, const char * const *papszNameFmts)
3408{
3409 X86FXSTATE State;
3410 RT_ZERO(State);
3411 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3412 {
3413 IEMBINARYOUTPUT BinOut;
3414 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdConst[iFn]), RTEXITCODE_FAILURE);
3415 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
3416 {
3417 State.FCW = RandFcw();
3418 State.FSW = RandFsw();
3419
3420 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3421 {
3422 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3423 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
3424 g_aFpuLdConst[iFn].pfn(&State, &Res);
3425 FPU_LD_CONST_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result };
3426 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3427 }
3428 }
3429 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3430 }
3431 return RTEXITCODE_SUCCESS;
3432}
3433DUMP_ALL_FN(FpuLdConst, g_aFpuLdConst)
3434#endif
3435
3436static void FpuLdConstTest(void)
3437{
3438 /*
3439 * Inputs:
3440 * - FSW: C0, C1, C2, C3
3441 * - FCW: Exception masks, Precision control, Rounding control.
3442 *
3443 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
3444 */
3445 X86FXSTATE State;
3446 RT_ZERO(State);
3447 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3448 {
3449 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdConst[iFn]))
3450 continue;
3451
3452 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
3453 uint32_t const cTests = g_aFpuLdConst[iFn].cTests;
3454 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
3455 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
3456 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3457 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3458 {
3459 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3460 {
3461 State.FCW = paTests[iTest].fFcw;
3462 State.FSW = paTests[iTest].fFswIn;
3463 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3464 pfn(&State, &Res);
3465 if ( Res.FSW != paTests[iTest].fFswOut
3466 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3467 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
3468 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3469 Res.FSW, FormatR80(&Res.r80Result),
3470 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3471 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3472 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3473 FormatFcw(paTests[iTest].fFcw) );
3474 }
3475 pfn = g_aFpuLdConst[iFn].pfnNative;
3476 }
3477
3478 FREE_DECOMPRESSED_TESTS(g_aFpuLdConst[iFn]);
3479 }
3480}
3481
3482
3483/*
3484 * Load floating point values from memory.
3485 */
3486#ifdef TSTIEMAIMPL_WITH_GENERATOR
3487# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3488static RTEXITCODE FpuLdR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3489{ \
3490 X86FXSTATE State; \
3491 RT_ZERO(State); \
3492 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3493 { \
3494 IEMBINARYOUTPUT BinOut; \
3495 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3496 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3497 { \
3498 State.FCW = RandFcw(); \
3499 State.FSW = RandFsw(); \
3500 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
3501 \
3502 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3503 { \
3504 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3505 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3506 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3507 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal }; \
3508 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3509 } \
3510 } \
3511 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3512 } \
3513 return RTEXITCODE_SUCCESS; \
3514} \
3515DUMP_ALL_FN(FpuLdR ## a_cBits, a_aSubTests)
3516#else
3517# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
3518#endif
3519
3520#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
3521typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
3522typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
3523TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
3524\
3525static a_SubTestType a_aSubTests[] = \
3526{ \
3527 ENTRY_BIN(RT_CONCAT(fld_r80_from_r,a_cBits)) \
3528}; \
3529GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3530\
3531static void FpuLdR ## a_cBits ## Test(void) \
3532{ \
3533 X86FXSTATE State; \
3534 RT_ZERO(State); \
3535 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3536 { \
3537 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3538 continue; \
3539 \
3540 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3541 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3542 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3543 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3544 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3545 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3546 { \
3547 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3548 { \
3549 a_rdTypeIn const InVal = paTests[iTest].InVal; \
3550 State.FCW = paTests[iTest].fFcw; \
3551 State.FSW = paTests[iTest].fFswIn; \
3552 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3553 pfn(&State, &Res, &InVal); \
3554 if ( Res.FSW != paTests[iTest].fFswOut \
3555 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3556 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3557 "%s -> fsw=%#06x %s\n" \
3558 "%s expected %#06x %s%s%s (%s)\n", \
3559 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3560 FormatR ## a_cBits(&paTests[iTest].InVal), \
3561 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3562 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3563 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3564 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3565 FormatFcw(paTests[iTest].fFcw) ); \
3566 } \
3567 pfn = a_aSubTests[iFn].pfnNative; \
3568 } \
3569 \
3570 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3571 } \
3572}
3573
3574TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
3575TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
3576TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
3577
3578#ifdef TSTIEMAIMPL_WITH_GENERATOR
3579static RTEXITCODE FpuLdMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3580{
3581 RTEXITCODE rcExit = FpuLdR80Generate(cTests, papszNameFmts);
3582 if (rcExit == RTEXITCODE_SUCCESS)
3583 rcExit = FpuLdR64Generate(cTests, papszNameFmts);
3584 if (rcExit == RTEXITCODE_SUCCESS)
3585 rcExit = FpuLdR32Generate(cTests, papszNameFmts);
3586 return rcExit;
3587}
3588
3589static RTEXITCODE FpuLdMemDumpAll(const char * const *papszNameFmts)
3590{
3591 RTEXITCODE rcExit = FpuLdR80DumpAll(papszNameFmts);
3592 if (rcExit == RTEXITCODE_SUCCESS)
3593 rcExit = FpuLdR64DumpAll(papszNameFmts);
3594 if (rcExit == RTEXITCODE_SUCCESS)
3595 rcExit = FpuLdR32DumpAll(papszNameFmts);
3596 return rcExit;
3597}
3598#endif
3599
3600static void FpuLdMemTest(void)
3601{
3602 FpuLdR80Test();
3603 FpuLdR64Test();
3604 FpuLdR32Test();
3605}
3606
3607
3608/*
3609 * Load integer values from memory.
3610 */
3611#ifdef TSTIEMAIMPL_WITH_GENERATOR
3612# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3613static RTEXITCODE FpuLdI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3614{ \
3615 X86FXSTATE State; \
3616 RT_ZERO(State); \
3617 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3618 { \
3619 IEMBINARYOUTPUT BinOut; \
3620 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3621 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3622 { \
3623 State.FCW = RandFcw(); \
3624 State.FSW = RandFsw(); \
3625 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
3626 \
3627 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3628 { \
3629 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3630 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3631 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3632 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result }; \
3633 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3634 } \
3635 } \
3636 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3637 } \
3638 return RTEXITCODE_SUCCESS; \
3639} \
3640DUMP_ALL_FN(FpuLdI ## a_cBits, a_aSubTests)
3641#else
3642# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
3643#endif
3644
3645#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
3646typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
3647typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
3648TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
3649\
3650static a_SubTestType a_aSubTests[] = \
3651{ \
3652 ENTRY_BIN(RT_CONCAT(fild_r80_from_i,a_cBits)) \
3653}; \
3654GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3655\
3656static void FpuLdI ## a_cBits ## Test(void) \
3657{ \
3658 X86FXSTATE State; \
3659 RT_ZERO(State); \
3660 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3661 { \
3662 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3663 continue; \
3664 \
3665 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3666 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3667 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3668 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3669 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3670 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3671 { \
3672 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3673 { \
3674 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
3675 State.FCW = paTests[iTest].fFcw; \
3676 State.FSW = paTests[iTest].fFswIn; \
3677 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3678 pfn(&State, &Res, &iInVal); \
3679 if ( Res.FSW != paTests[iTest].fFswOut \
3680 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3681 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
3682 "%s -> fsw=%#06x %s\n" \
3683 "%s expected %#06x %s%s%s (%s)\n", \
3684 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
3685 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3686 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3687 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3688 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3689 FormatFcw(paTests[iTest].fFcw) ); \
3690 } \
3691 pfn = a_aSubTests[iFn].pfnNative; \
3692 } \
3693 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3694 } \
3695}
3696
3697TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
3698TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
3699TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
3700
3701#ifdef TSTIEMAIMPL_WITH_GENERATOR
3702static RTEXITCODE FpuLdIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
3703{
3704 RTEXITCODE rcExit = FpuLdI64Generate(cTests, papszNameFmts);
3705 if (rcExit == RTEXITCODE_SUCCESS)
3706 rcExit = FpuLdI32Generate(cTests, papszNameFmts);
3707 if (rcExit == RTEXITCODE_SUCCESS)
3708 rcExit = FpuLdI16Generate(cTests, papszNameFmts);
3709 return rcExit;
3710}
3711
3712static RTEXITCODE FpuLdIntDumpAll(const char * const *papszNameFmts)
3713{
3714 RTEXITCODE rcExit = FpuLdI64DumpAll(papszNameFmts);
3715 if (rcExit == RTEXITCODE_SUCCESS)
3716 rcExit = FpuLdI32DumpAll(papszNameFmts);
3717 if (rcExit == RTEXITCODE_SUCCESS)
3718 rcExit = FpuLdI16DumpAll(papszNameFmts);
3719 return rcExit;
3720}
3721#endif
3722
3723static void FpuLdIntTest(void)
3724{
3725 FpuLdI64Test();
3726 FpuLdI32Test();
3727 FpuLdI16Test();
3728}
3729
3730
3731/*
3732 * Load binary coded decimal values from memory.
3733 */
3734typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
3735typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
3736TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
3737
3738static FPU_LD_D80_T g_aFpuLdD80[] =
3739{
3740 ENTRY_BIN(fld_r80_from_d80)
3741};
3742
3743#ifdef TSTIEMAIMPL_WITH_GENERATOR
3744static RTEXITCODE FpuLdD80Generate(uint32_t cTests, const char * const *papszNameFmts)
3745{
3746 X86FXSTATE State;
3747 RT_ZERO(State);
3748 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3749 {
3750 IEMBINARYOUTPUT BinOut;
3751 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdD80[iFn]), RTEXITCODE_FAILURE);
3752 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3753 {
3754 State.FCW = RandFcw();
3755 State.FSW = RandFsw();
3756 RTPBCD80U InVal = RandD80Src(iTest);
3757
3758 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3759 {
3760 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3761 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
3762 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
3763 FPU_D80_IN_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal };
3764 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3765 }
3766 }
3767 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3768 }
3769 return RTEXITCODE_SUCCESS;
3770}
3771DUMP_ALL_FN(FpuLdD80, g_aFpuLdD80)
3772#endif
3773
3774static void FpuLdD80Test(void)
3775{
3776 X86FXSTATE State;
3777 RT_ZERO(State);
3778 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3779 {
3780 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdD80[iFn]))
3781 continue;
3782
3783 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
3784 uint32_t const cTests = g_aFpuLdD80[iFn].cTests;
3785 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
3786 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
3787 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3788 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3789 {
3790 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3791 {
3792 RTPBCD80U const InVal = paTests[iTest].InVal;
3793 State.FCW = paTests[iTest].fFcw;
3794 State.FSW = paTests[iTest].fFswIn;
3795 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3796 pfn(&State, &Res, &InVal);
3797 if ( Res.FSW != paTests[iTest].fFswOut
3798 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3799 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
3800 "%s -> fsw=%#06x %s\n"
3801 "%s expected %#06x %s%s%s (%s)\n",
3802 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3803 FormatD80(&paTests[iTest].InVal),
3804 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3805 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3806 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3807 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3808 FormatFcw(paTests[iTest].fFcw) );
3809 }
3810 pfn = g_aFpuLdD80[iFn].pfnNative;
3811 }
3812
3813 FREE_DECOMPRESSED_TESTS(g_aFpuLdD80[iFn]);
3814 }
3815}
3816
3817
3818/*
3819 * Store values floating point values to memory.
3820 */
3821#ifdef TSTIEMAIMPL_WITH_GENERATOR
3822static const RTFLOAT80U g_aFpuStR32Specials[] =
3823{
3824 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3825 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3826 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3827 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3828};
3829static const RTFLOAT80U g_aFpuStR64Specials[] =
3830{
3831 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3832 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3833 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3834 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3835 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
3836};
3837static const RTFLOAT80U g_aFpuStR80Specials[] =
3838{
3839 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
3840};
3841# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3842static RTEXITCODE FpuStR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3843{ \
3844 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
3845 X86FXSTATE State; \
3846 RT_ZERO(State); \
3847 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3848 { \
3849 IEMBINARYOUTPUT BinOut; \
3850 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3851 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3852 { \
3853 uint16_t const fFcw = RandFcw(); \
3854 State.FSW = RandFsw(); \
3855 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
3856 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
3857 \
3858 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3859 { \
3860 /* PC doesn't influence these, so leave as is. */ \
3861 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3862 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3863 { \
3864 uint16_t uFswOut = 0; \
3865 a_rdType OutVal; \
3866 RT_ZERO(OutVal); \
3867 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3868 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3869 | (iRounding << X86_FCW_RC_SHIFT); \
3870 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3871 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3872 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
3873 a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal }; \
3874 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3875 } \
3876 } \
3877 } \
3878 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3879 } \
3880 return RTEXITCODE_SUCCESS; \
3881} \
3882DUMP_ALL_FN(FpuStR ## a_cBits, a_aSubTests)
3883#else
3884# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
3885#endif
3886
3887#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
3888typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
3889 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
3890typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
3891TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
3892\
3893static a_SubTestType a_aSubTests[] = \
3894{ \
3895 ENTRY_BIN(RT_CONCAT(fst_r80_to_r,a_cBits)) \
3896}; \
3897GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3898\
3899static void FpuStR ## a_cBits ## Test(void) \
3900{ \
3901 X86FXSTATE State; \
3902 RT_ZERO(State); \
3903 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3904 { \
3905 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3906 continue; \
3907 \
3908 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3909 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3910 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3911 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3912 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3913 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3914 { \
3915 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3916 { \
3917 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3918 uint16_t uFswOut = 0; \
3919 a_rdType OutVal; \
3920 RT_ZERO(OutVal); \
3921 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3922 State.FCW = paTests[iTest].fFcw; \
3923 State.FSW = paTests[iTest].fFswIn; \
3924 pfn(&State, &uFswOut, &OutVal, &InVal); \
3925 if ( uFswOut != paTests[iTest].fFswOut \
3926 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
3927 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3928 "%s -> fsw=%#06x %s\n" \
3929 "%s expected %#06x %s%s%s (%s)\n", \
3930 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3931 FormatR80(&paTests[iTest].InVal), \
3932 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
3933 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
3934 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3935 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
3936 FormatFcw(paTests[iTest].fFcw) ); \
3937 } \
3938 pfn = a_aSubTests[iFn].pfnNative; \
3939 } \
3940 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3941 } \
3942}
3943
3944TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3945TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3946TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3947
3948#ifdef TSTIEMAIMPL_WITH_GENERATOR
3949static RTEXITCODE FpuStMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3950{
3951 RTEXITCODE rcExit = FpuStR80Generate(cTests, papszNameFmts);
3952 if (rcExit == RTEXITCODE_SUCCESS)
3953 rcExit = FpuStR64Generate(cTests, papszNameFmts);
3954 if (rcExit == RTEXITCODE_SUCCESS)
3955 rcExit = FpuStR32Generate(cTests, papszNameFmts);
3956 return rcExit;
3957}
3958
3959static RTEXITCODE FpuStMemDumpAll(const char * const *papszNameFmts)
3960{
3961 RTEXITCODE rcExit = FpuStR80DumpAll(papszNameFmts);
3962 if (rcExit == RTEXITCODE_SUCCESS)
3963 rcExit = FpuStR64DumpAll(papszNameFmts);
3964 if (rcExit == RTEXITCODE_SUCCESS)
3965 rcExit = FpuStR32DumpAll(papszNameFmts);
3966 return rcExit;
3967}
3968#endif
3969
3970static void FpuStMemTest(void)
3971{
3972 FpuStR80Test();
3973 FpuStR64Test();
3974 FpuStR32Test();
3975}
3976
3977
3978/*
3979 * Store integer values to memory or register.
3980 */
3981TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3982TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3983TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3984
3985static FPU_ST_I16_T g_aFpuStI16[] =
3986{
3987 ENTRY_BIN(fist_r80_to_i16),
3988 ENTRY_BIN_AMD( fistt_r80_to_i16, 0),
3989 ENTRY_BIN_INTEL(fistt_r80_to_i16, 0),
3990};
3991static FPU_ST_I32_T g_aFpuStI32[] =
3992{
3993 ENTRY_BIN(fist_r80_to_i32),
3994 ENTRY_BIN(fistt_r80_to_i32),
3995};
3996static FPU_ST_I64_T g_aFpuStI64[] =
3997{
3998 ENTRY_BIN(fist_r80_to_i64),
3999 ENTRY_BIN(fistt_r80_to_i64),
4000};
4001
4002#ifdef TSTIEMAIMPL_WITH_GENERATOR
4003static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
4004{
4005 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
4006 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
4007 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4008 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4009 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
4010 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
4011 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
4012 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
4013 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
4014 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
4015 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
4016 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
4017 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4018 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4019 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
4020 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4021 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4022 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
4023 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
4024 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4025 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4026 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4027 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
4028 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
4029 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
4030 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
4031 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
4032 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
4033 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
4034 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
4035 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
4036 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
4037 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4038 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4039 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
4040 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
4041 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4042 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4043 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4044 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4045 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
4046 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
4047 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4048 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4049 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4050 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4051 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
4052};
4053static const RTFLOAT80U g_aFpuStI32Specials[] =
4054{
4055 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4056 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
4057 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4058 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4059 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4060 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4061 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4062 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
4063 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
4064 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
4065 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4066 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
4067 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4068 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
4069 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4070 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
4071};
4072static const RTFLOAT80U g_aFpuStI64Specials[] =
4073{
4074 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
4075 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
4076 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
4077 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
4078 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
4079 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
4080 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
4081 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
4082 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
4083 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
4084 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
4085 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
4086 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
4087 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
4088 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
4089 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
4090 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
4091};
4092
4093# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
4094static RTEXITCODE FpuStI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4095{ \
4096 X86FXSTATE State; \
4097 RT_ZERO(State); \
4098 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4099 { \
4100 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
4101 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
4102 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
4103 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
4104 continue; \
4105 \
4106 IEMBINARYOUTPUT BinOut; \
4107 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4108 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
4109 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
4110 { \
4111 uint16_t const fFcw = RandFcw(); \
4112 State.FSW = RandFsw(); \
4113 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
4114 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
4115 \
4116 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4117 { \
4118 /* PC doesn't influence these, so leave as is. */ \
4119 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
4120 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
4121 { \
4122 uint16_t uFswOut = 0; \
4123 a_iType iOutVal = ~(a_iType)2; \
4124 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
4125 | (iRounding << X86_FCW_RC_SHIFT); \
4126 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
4127 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
4128 pfn(&State, &uFswOut, &iOutVal, &InVal); \
4129 a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, iOutVal }; \
4130 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4131 } \
4132 } \
4133 } \
4134 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4135 } \
4136 return RTEXITCODE_SUCCESS; \
4137} \
4138DUMP_ALL_FN(FpuStI ## a_cBits, a_aSubTests)
4139#else
4140# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
4141#endif
4142
4143#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
4144GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
4145\
4146static void FpuStI ## a_cBits ## Test(void) \
4147{ \
4148 X86FXSTATE State; \
4149 RT_ZERO(State); \
4150 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4151 { \
4152 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4153 continue; \
4154 \
4155 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4156 uint32_t const cTests = a_aSubTests[iFn].cTests; \
4157 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
4158 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4159 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4160 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4161 { \
4162 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4163 { \
4164 RTFLOAT80U const InVal = paTests[iTest].InVal; \
4165 uint16_t uFswOut = 0; \
4166 a_iType iOutVal = ~(a_iType)2; \
4167 State.FCW = paTests[iTest].fFcw; \
4168 State.FSW = paTests[iTest].fFswIn; \
4169 pfn(&State, &uFswOut, &iOutVal, &InVal); \
4170 if ( uFswOut != paTests[iTest].fFswOut \
4171 || iOutVal != paTests[iTest].iOutVal) \
4172 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
4173 "%s -> fsw=%#06x " a_szFmt "\n" \
4174 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
4175 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4176 FormatR80(&paTests[iTest].InVal), \
4177 iVar ? " " : "", uFswOut, iOutVal, \
4178 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
4179 FswDiff(uFswOut, paTests[iTest].fFswOut), \
4180 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
4181 } \
4182 pfn = a_aSubTests[iFn].pfnNative; \
4183 } \
4184 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4185 } \
4186}
4187
4188//fistt_r80_to_i16 diffs for AMD, of course :-)
4189
4190TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
4191TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
4192TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
4193
4194#ifdef TSTIEMAIMPL_WITH_GENERATOR
4195static RTEXITCODE FpuStIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
4196{
4197 RTEXITCODE rcExit = FpuStI64Generate(cTests, papszNameFmts);
4198 if (rcExit == RTEXITCODE_SUCCESS)
4199 rcExit = FpuStI32Generate(cTests, papszNameFmts);
4200 if (rcExit == RTEXITCODE_SUCCESS)
4201 rcExit = FpuStI16Generate(cTests, papszNameFmts);
4202 return rcExit;
4203}
4204static RTEXITCODE FpuStIntDumpAll(const char * const *papszNameFmts)
4205{
4206 RTEXITCODE rcExit = FpuStI64DumpAll(papszNameFmts);
4207 if (rcExit == RTEXITCODE_SUCCESS)
4208 rcExit = FpuStI32DumpAll(papszNameFmts);
4209 if (rcExit == RTEXITCODE_SUCCESS)
4210 rcExit = FpuStI16DumpAll(papszNameFmts);
4211 return rcExit;
4212}
4213#endif
4214
4215static void FpuStIntTest(void)
4216{
4217 FpuStI64Test();
4218 FpuStI32Test();
4219 FpuStI16Test();
4220}
4221
4222
4223/*
4224 * Store as packed BCD value (memory).
4225 */
4226typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
4227typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
4228TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
4229
4230static FPU_ST_D80_T g_aFpuStD80[] =
4231{
4232 ENTRY_BIN(fst_r80_to_d80),
4233};
4234
4235#ifdef TSTIEMAIMPL_WITH_GENERATOR
4236static RTEXITCODE FpuStD80Generate(uint32_t cTests, const char * const *papszNameFmts)
4237{
4238 static RTFLOAT80U const s_aSpecials[] =
4239 {
4240 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
4241 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
4242 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
4243 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
4244 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
4245 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
4246 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
4247 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
4248 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
4249 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
4250 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
4251 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
4252 };
4253
4254 X86FXSTATE State;
4255 RT_ZERO(State);
4256 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4257 {
4258 IEMBINARYOUTPUT BinOut;
4259 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuStD80[iFn]), RTEXITCODE_FAILURE);
4260 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4261 {
4262 uint16_t const fFcw = RandFcw();
4263 State.FSW = RandFsw();
4264 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
4265
4266 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4267 {
4268 /* PC doesn't influence these, so leave as is. */
4269 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
4270 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
4271 {
4272 uint16_t uFswOut = 0;
4273 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4274 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
4275 | (iRounding << X86_FCW_RC_SHIFT);
4276 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
4277 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
4278 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
4279 FPU_ST_D80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal };
4280 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4281 }
4282 }
4283 }
4284 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4285 }
4286 return RTEXITCODE_SUCCESS;
4287}
4288DUMP_ALL_FN(FpuStD80, g_aFpuStD80)
4289#endif
4290
4291
4292static void FpuStD80Test(void)
4293{
4294 X86FXSTATE State;
4295 RT_ZERO(State);
4296 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4297 {
4298 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuStD80[iFn]))
4299 continue;
4300
4301 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
4302 uint32_t const cTests = g_aFpuStD80[iFn].cTests;
4303 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
4304 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
4305 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4306 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4307 {
4308 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4309 {
4310 RTFLOAT80U const InVal = paTests[iTest].InVal;
4311 uint16_t uFswOut = 0;
4312 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4313 State.FCW = paTests[iTest].fFcw;
4314 State.FSW = paTests[iTest].fFswIn;
4315 pfn(&State, &uFswOut, &OutVal, &InVal);
4316 if ( uFswOut != paTests[iTest].fFswOut
4317 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
4318 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4319 "%s -> fsw=%#06x %s\n"
4320 "%s expected %#06x %s%s%s (%s)\n",
4321 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4322 FormatR80(&paTests[iTest].InVal),
4323 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
4324 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
4325 FswDiff(uFswOut, paTests[iTest].fFswOut),
4326 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
4327 FormatFcw(paTests[iTest].fFcw) );
4328 }
4329 pfn = g_aFpuStD80[iFn].pfnNative;
4330 }
4331
4332 FREE_DECOMPRESSED_TESTS(g_aFpuStD80[iFn]);
4333 }
4334}
4335
4336
4337
4338/*********************************************************************************************************************************
4339* x87 FPU Binary Operations *
4340*********************************************************************************************************************************/
4341
4342/*
4343 * Binary FPU operations on two 80-bit floating point values.
4344 */
4345TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
4346enum { kFpuBinaryHint_fprem = 1, };
4347
4348static FPU_BINARY_R80_T g_aFpuBinaryR80[] =
4349{
4350 ENTRY_BIN(fadd_r80_by_r80),
4351 ENTRY_BIN(fsub_r80_by_r80),
4352 ENTRY_BIN(fsubr_r80_by_r80),
4353 ENTRY_BIN(fmul_r80_by_r80),
4354 ENTRY_BIN(fdiv_r80_by_r80),
4355 ENTRY_BIN(fdivr_r80_by_r80),
4356 ENTRY_BIN_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
4357 ENTRY_BIN_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
4358 ENTRY_BIN(fscale_r80_by_r80),
4359 ENTRY_BIN_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4360 ENTRY_BIN_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4361 ENTRY_BIN_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4362 ENTRY_BIN_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4363 ENTRY_BIN_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4364 ENTRY_BIN_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4365};
4366
4367#ifdef TSTIEMAIMPL_WITH_GENERATOR
4368static RTEXITCODE FpuBinaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4369{
4370 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4371
4372 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
4373 {
4374 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
4375 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4376 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
4377 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4378 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
4379 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4380 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
4381 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
4382 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
4383 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
4384 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4385 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4386 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4387 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4388 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
4389 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
4390 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
4391 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4392 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
4393 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
4394 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
4395 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
4396 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
4397 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
4398 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
4399 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
4400 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4401 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4402 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4403 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4404 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
4405 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
4406 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
4407 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
4408 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
4409 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
4410 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
4411 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
4412 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4413 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
4414 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4415 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
4416 /* fscale: Negative variants for the essentials of the above. */
4417 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4418 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4419 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4420 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4421 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4422 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
4423 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4424 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
4425 /* fscale: Some fun with denormals and pseudo-denormals. */
4426 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
4427 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4428 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
4429 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4430 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
4431 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
4432 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4433 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4434 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4435 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
4436 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4437 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
4438 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4439 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
4440 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
4441 };
4442
4443 X86FXSTATE State;
4444 RT_ZERO(State);
4445 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4446 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
4447 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4448 {
4449 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
4450 if ( g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4451 && g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4452 continue;
4453
4454 IEMBINARYOUTPUT BinOut;
4455 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryR80[iFn]), RTEXITCODE_FAILURE);
4456 uint32_t cNormalInputPairs = 0;
4457 uint32_t cTargetRangeInputs = 0;
4458 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4459 {
4460 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
4461 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4462 bool fTargetRange = false;
4463 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4464 {
4465 cNormalInputPairs++;
4466 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
4467 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
4468 cTargetRangeInputs += fTargetRange = true;
4469 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
4470 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4471 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
4472 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
4473 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
4474 cTargetRangeInputs += fTargetRange = true;
4475 }
4476 }
4477 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4478 {
4479 iTest -= 1;
4480 continue;
4481 }
4482
4483 uint16_t const fFcwExtra = 0;
4484 uint16_t const fFcw = RandFcw();
4485 State.FSW = RandFsw();
4486
4487 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4488 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4489 {
4490 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4491 | (iRounding << X86_FCW_RC_SHIFT)
4492 | (iPrecision << X86_FCW_PC_SHIFT)
4493 | X86_FCW_MASK_ALL;
4494 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4495 pfn(&State, &ResM, &InVal1, &InVal2);
4496 FPU_BINARY_R80_TEST_T const TestM
4497 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal1, InVal2, ResM.r80Result };
4498 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
4499
4500 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4501 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4502 pfn(&State, &ResU, &InVal1, &InVal2);
4503 FPU_BINARY_R80_TEST_T const TestU
4504 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal1, InVal2, ResU.r80Result };
4505 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
4506
4507 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4508 if (fXcpt)
4509 {
4510 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4511 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4512 pfn(&State, &Res1, &InVal1, &InVal2);
4513 FPU_BINARY_R80_TEST_T const Test1
4514 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal1, InVal2, Res1.r80Result };
4515 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
4516
4517 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4518 {
4519 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4520 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4521 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4522 pfn(&State, &Res2, &InVal1, &InVal2);
4523 FPU_BINARY_R80_TEST_T const Test2
4524 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal1, InVal2, Res2.r80Result };
4525 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
4526 }
4527 if (!RT_IS_POWER_OF_TWO(fXcpt))
4528 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4529 if (fUnmasked & fXcpt)
4530 {
4531 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4532 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4533 pfn(&State, &Res3, &InVal1, &InVal2);
4534 FPU_BINARY_R80_TEST_T const Test3
4535 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal1, InVal2, Res3.r80Result };
4536 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
4537 }
4538 }
4539
4540 /* If the values are in range and caused no exceptions, do the whole series of
4541 partial reminders till we get the non-partial one or run into an exception. */
4542 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4543 {
4544 IEMFPURESULT ResPrev = ResM;
4545 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
4546 {
4547 State.FCW = State.FCW | X86_FCW_MASK_ALL;
4548 State.FSW = ResPrev.FSW;
4549 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4550 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
4551 FPU_BINARY_R80_TEST_T const TestSeq
4552 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResSeq.FSW, ResPrev.r80Result, InVal2, ResSeq.r80Result };
4553 GenerateBinaryWrite(&BinOut, &TestSeq, sizeof(TestSeq));
4554 ResPrev = ResSeq;
4555 }
4556 }
4557 }
4558 }
4559 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4560 }
4561 return RTEXITCODE_SUCCESS;
4562}
4563DUMP_ALL_FN(FpuBinaryR80, g_aFpuBinaryR80)
4564#endif
4565
4566
4567static void FpuBinaryR80Test(void)
4568{
4569 X86FXSTATE State;
4570 RT_ZERO(State);
4571 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4572 {
4573 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryR80[iFn]))
4574 continue;
4575
4576 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
4577 uint32_t const cTests = g_aFpuBinaryR80[iFn].cTests;
4578 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
4579 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
4580 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4581 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4582 {
4583 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4584 {
4585 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4586 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4587 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4588 State.FCW = paTests[iTest].fFcw;
4589 State.FSW = paTests[iTest].fFswIn;
4590 pfn(&State, &Res, &InVal1, &InVal2);
4591 if ( Res.FSW != paTests[iTest].fFswOut
4592 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
4593 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4594 "%s -> fsw=%#06x %s\n"
4595 "%s expected %#06x %s%s%s (%s)\n",
4596 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4597 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4598 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4599 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4600 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4601 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4602 FormatFcw(paTests[iTest].fFcw) );
4603 }
4604 pfn = g_aFpuBinaryR80[iFn].pfnNative;
4605 }
4606
4607 FREE_DECOMPRESSED_TESTS(g_aFpuBinaryR80[iFn]);
4608 }
4609}
4610
4611
4612/*
4613 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
4614 */
4615#define int64_t_IS_NORMAL(a) 1
4616#define int32_t_IS_NORMAL(a) 1
4617#define int16_t_IS_NORMAL(a) 1
4618
4619#ifdef TSTIEMAIMPL_WITH_GENERATOR
4620static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
4621{
4622 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4623 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4624};
4625static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
4626{
4627 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4628 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4629};
4630static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
4631{
4632 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4633};
4634static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
4635{
4636 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4637};
4638
4639# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4640static RTEXITCODE FpuBinary ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4641{ \
4642 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4643 \
4644 X86FXSTATE State; \
4645 RT_ZERO(State); \
4646 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4647 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4648 { \
4649 IEMBINARYOUTPUT BinOut; \
4650 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4651 uint32_t cNormalInputPairs = 0; \
4652 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
4653 { \
4654 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4655 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
4656 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4657 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
4658 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4659 cNormalInputPairs++; \
4660 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4661 { \
4662 iTest -= 1; \
4663 continue; \
4664 } \
4665 \
4666 uint16_t const fFcw = RandFcw(); \
4667 State.FSW = RandFsw(); \
4668 \
4669 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4670 { \
4671 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
4672 { \
4673 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4674 { \
4675 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
4676 | (iRounding << X86_FCW_RC_SHIFT) \
4677 | (iPrecision << X86_FCW_PC_SHIFT) \
4678 | iMask; \
4679 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4680 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
4681 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, InVal1, InVal2, Res.r80Result }; \
4682 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4683 } \
4684 } \
4685 } \
4686 } \
4687 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4688 } \
4689 return RTEXITCODE_SUCCESS; \
4690} \
4691DUMP_ALL_FN(FpuBinary ## a_UpBits, a_aSubTests)
4692#else
4693# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4694#endif
4695
4696#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
4697TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
4698\
4699static a_SubTestType a_aSubTests[] = \
4700{ \
4701 ENTRY_BIN(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
4702 ENTRY_BIN(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
4703 ENTRY_BIN(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
4704 ENTRY_BIN(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
4705 ENTRY_BIN(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
4706 ENTRY_BIN(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
4707}; \
4708\
4709GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4710\
4711static void FpuBinary ## a_UpBits ## Test(void) \
4712{ \
4713 X86FXSTATE State; \
4714 RT_ZERO(State); \
4715 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4716 { \
4717 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4718 continue; \
4719 \
4720 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4721 uint32_t const cTests = a_aSubTests[iFn].cTests; \
4722 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
4723 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4724 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4725 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4726 { \
4727 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4728 { \
4729 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4730 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4731 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4732 State.FCW = paTests[iTest].fFcw; \
4733 State.FSW = paTests[iTest].fFswIn; \
4734 pfn(&State, &Res, &InVal1, &InVal2); \
4735 if ( Res.FSW != paTests[iTest].fFswOut \
4736 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
4737 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4738 "%s -> fsw=%#06x %s\n" \
4739 "%s expected %#06x %s%s%s (%s)\n", \
4740 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4741 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4742 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
4743 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
4744 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
4745 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
4746 FormatFcw(paTests[iTest].fFcw) ); \
4747 } \
4748 pfn = a_aSubTests[iFn].pfnNative; \
4749 } \
4750 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4751 } \
4752}
4753
4754TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
4755TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
4756TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
4757TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
4758
4759
4760/*
4761 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
4762 */
4763#ifdef TSTIEMAIMPL_WITH_GENERATOR
4764static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
4765{
4766 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4767 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4768};
4769static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
4770{
4771 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4772 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4773};
4774static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
4775{
4776 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4777 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4778};
4779static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
4780{
4781 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4782};
4783static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
4784{
4785 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4786};
4787
4788# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4789static RTEXITCODE FpuBinaryFsw ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4790{ \
4791 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4792 \
4793 X86FXSTATE State; \
4794 RT_ZERO(State); \
4795 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4796 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4797 { \
4798 IEMBINARYOUTPUT BinOut; \
4799 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4800 uint32_t cNormalInputPairs = 0; \
4801 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
4802 { \
4803 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4804 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
4805 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4806 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
4807 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4808 cNormalInputPairs++; \
4809 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4810 { \
4811 iTest -= 1; \
4812 continue; \
4813 } \
4814 \
4815 uint16_t const fFcw = RandFcw(); \
4816 State.FSW = RandFsw(); \
4817 \
4818 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
4819 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4820 { \
4821 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
4822 uint16_t fFswOut = 0; \
4823 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
4824 a_TestType const Test = { State.FCW, State.FSW, fFswOut, InVal1, InVal2 }; \
4825 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4826 } \
4827 } \
4828 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4829 } \
4830 return RTEXITCODE_SUCCESS; \
4831} \
4832DUMP_ALL_FN(FpuBinaryFsw ## a_UpBits, a_aSubTests)
4833#else
4834# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4835#endif
4836
4837#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
4838TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
4839\
4840static a_SubTestType a_aSubTests[] = \
4841{ \
4842 __VA_ARGS__ \
4843}; \
4844\
4845GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4846\
4847static void FpuBinaryFsw ## a_UpBits ## Test(void) \
4848{ \
4849 X86FXSTATE State; \
4850 RT_ZERO(State); \
4851 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4852 { \
4853 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4854 continue; \
4855 \
4856 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4857 uint32_t const cTests = a_aSubTests[iFn].cTests; \
4858 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
4859 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4860 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4861 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4862 { \
4863 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4864 { \
4865 uint16_t fFswOut = 0; \
4866 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4867 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4868 State.FCW = paTests[iTest].fFcw; \
4869 State.FSW = paTests[iTest].fFswIn; \
4870 pfn(&State, &fFswOut, &InVal1, &InVal2); \
4871 if (fFswOut != paTests[iTest].fFswOut) \
4872 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4873 "%s -> fsw=%#06x\n" \
4874 "%s expected %#06x %s (%s)\n", \
4875 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4876 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4877 iVar ? " " : "", fFswOut, \
4878 iVar ? " " : "", paTests[iTest].fFswOut, \
4879 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
4880 } \
4881 pfn = a_aSubTests[iFn].pfnNative; \
4882 } \
4883 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4884 } \
4885}
4886
4887TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY_BIN(fcom_r80_by_r80), ENTRY_BIN(fucom_r80_by_r80))
4888TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY_BIN(fcom_r80_by_r64))
4889TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY_BIN(fcom_r80_by_r32))
4890TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY_BIN(ficom_r80_by_i32))
4891TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY_BIN(ficom_r80_by_i16))
4892
4893
4894/*
4895 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
4896 */
4897TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
4898
4899static FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
4900{
4901 ENTRY_BIN(fcomi_r80_by_r80),
4902 ENTRY_BIN(fucomi_r80_by_r80),
4903};
4904
4905#ifdef TSTIEMAIMPL_WITH_GENERATOR
4906static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
4907{
4908 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4909 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4910};
4911
4912static RTEXITCODE FpuBinaryEflR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4913{
4914 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
4915
4916 X86FXSTATE State;
4917 RT_ZERO(State);
4918 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4919 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4920 {
4921 IEMBINARYOUTPUT BinOut;
4922 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryEflR80[iFn]), RTEXITCODE_FAILURE);
4923 uint32_t cNormalInputPairs = 0;
4924 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
4925 {
4926 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
4927 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
4928 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4929 cNormalInputPairs++;
4930 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4931 {
4932 iTest -= 1;
4933 continue;
4934 }
4935
4936 uint16_t const fFcw = RandFcw();
4937 State.FSW = RandFsw();
4938
4939 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
4940 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4941 {
4942 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
4943 uint16_t uFswOut = 0;
4944 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
4945 FPU_BINARY_EFL_R80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal1, InVal2, fEflOut, };
4946 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4947 }
4948 }
4949 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4950 }
4951 return RTEXITCODE_SUCCESS;
4952}
4953DUMP_ALL_FN(FpuBinaryEflR80, g_aFpuBinaryEflR80)
4954#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
4955
4956static void FpuBinaryEflR80Test(void)
4957{
4958 X86FXSTATE State;
4959 RT_ZERO(State);
4960 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4961 {
4962 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryEflR80[iFn]))
4963 continue;
4964
4965 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
4966 uint32_t const cTests = g_aFpuBinaryEflR80[iFn].cTests;
4967 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
4968 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
4969 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4970 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4971 {
4972 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4973 {
4974 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4975 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4976 State.FCW = paTests[iTest].fFcw;
4977 State.FSW = paTests[iTest].fFswIn;
4978 uint16_t uFswOut = 0;
4979 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4980 if ( uFswOut != paTests[iTest].fFswOut
4981 || fEflOut != paTests[iTest].fEflOut)
4982 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4983 "%s -> fsw=%#06x efl=%#08x\n"
4984 "%s expected %#06x %#08x %s%s (%s)\n",
4985 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4986 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4987 iVar ? " " : "", uFswOut, fEflOut,
4988 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4989 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4990 FormatFcw(paTests[iTest].fFcw));
4991 }
4992 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
4993 }
4994
4995 FREE_DECOMPRESSED_TESTS(g_aFpuBinaryEflR80[iFn]);
4996 }
4997}
4998
4999
5000/*********************************************************************************************************************************
5001* x87 FPU Unary Operations *
5002*********************************************************************************************************************************/
5003
5004/*
5005 * Unary FPU operations on one 80-bit floating point value.
5006 *
5007 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
5008 * a rounding error or not.
5009 */
5010TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
5011
5012enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
5013static FPU_UNARY_R80_T g_aFpuUnaryR80[] =
5014{
5015 ENTRY_BIN_EX( fabs_r80, kUnary_Accurate),
5016 ENTRY_BIN_EX( fchs_r80, kUnary_Accurate),
5017 ENTRY_BIN_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
5018 ENTRY_BIN_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
5019 ENTRY_BIN_EX( fsqrt_r80, kUnary_Accurate),
5020 ENTRY_BIN_EX( frndint_r80, kUnary_Accurate),
5021 ENTRY_BIN_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
5022 ENTRY_BIN_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
5023 ENTRY_BIN_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
5024 ENTRY_BIN_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
5025};
5026
5027#ifdef TSTIEMAIMPL_WITH_GENERATOR
5028
5029static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
5030{
5031 if ( enmKind == kUnary_Rounding_F2xm1
5032 && RTFLOAT80U_IS_NORMAL(pr80Val)
5033 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
5034 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
5035 return true;
5036 return false;
5037}
5038
5039DUMP_ALL_FN(FpuUnaryR80, g_aFpuUnaryR80)
5040static RTEXITCODE FpuUnaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5041{
5042 static RTFLOAT80U const s_aSpecials[] =
5043 {
5044 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
5045 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
5046 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
5047 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
5048 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
5049 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
5050 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
5051 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
5052 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
5053 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
5054 };
5055 X86FXSTATE State;
5056 RT_ZERO(State);
5057 uint32_t cMinNormals = cTests / 4;
5058 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
5059 {
5060 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
5061 if ( g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5062 && g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5063 continue;
5064
5065 IEMBINARYOUTPUT BinOut;
5066 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryR80[iFn]), RTEXITCODE_FAILURE);
5067 uint32_t cNormalInputs = 0;
5068 uint32_t cTargetRangeInputs = 0;
5069 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5070 {
5071 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5072 if (RTFLOAT80U_IS_NORMAL(&InVal))
5073 {
5074 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
5075 {
5076 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
5077 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
5078 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
5079 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
5080 cTargetRangeInputs++;
5081 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
5082 {
5083 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
5084 cTargetRangeInputs++;
5085 }
5086 }
5087 cNormalInputs++;
5088 }
5089 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5090 {
5091 iTest -= 1;
5092 continue;
5093 }
5094
5095 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
5096 uint16_t const fFcw = RandFcw();
5097 State.FSW = RandFsw();
5098
5099 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5100 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5101 {
5102 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
5103 | (iRounding << X86_FCW_RC_SHIFT)
5104 | (iPrecision << X86_FCW_PC_SHIFT)
5105 | X86_FCW_MASK_ALL;
5106 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5107 pfn(&State, &ResM, &InVal);
5108 FPU_UNARY_R80_TEST_T const TestM
5109 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result };
5110 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5111
5112 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5113 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5114 pfn(&State, &ResU, &InVal);
5115 FPU_UNARY_R80_TEST_T const TestU
5116 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result };
5117 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5118
5119 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5120 if (fXcpt)
5121 {
5122 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5123 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5124 pfn(&State, &Res1, &InVal);
5125 FPU_UNARY_R80_TEST_T const Test1
5126 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result };
5127 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5128 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5129 {
5130 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
5131 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5132 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5133 pfn(&State, &Res2, &InVal);
5134 FPU_UNARY_R80_TEST_T const Test2
5135 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result };
5136 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5137 }
5138 if (!RT_IS_POWER_OF_TWO(fXcpt))
5139 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5140 if (fUnmasked & fXcpt)
5141 {
5142 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
5143 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5144 pfn(&State, &Res3, &InVal);
5145 FPU_UNARY_R80_TEST_T const Test3
5146 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result };
5147 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5148 }
5149 }
5150 }
5151 }
5152 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5153 }
5154 return RTEXITCODE_SUCCESS;
5155}
5156#endif
5157
5158static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
5159{
5160 if (fFcw1 == fFcw2)
5161 return true;
5162 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
5163 {
5164 *pfRndErr = true;
5165 return true;
5166 }
5167 return false;
5168}
5169
5170static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
5171{
5172 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
5173 return true;
5174 if ( fRndErrOk
5175 && pr80Val1->s.fSign == pr80Val2->s.fSign)
5176 {
5177 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
5178 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
5179 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
5180 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
5181 ||
5182 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
5183 && pr80Val1->s.uMantissa == UINT64_MAX
5184 && pr80Val2->s.uMantissa == RT_BIT_64(63))
5185 ||
5186 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
5187 && pr80Val2->s.uMantissa == UINT64_MAX
5188 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
5189 {
5190 *pfRndErr = true;
5191 return true;
5192 }
5193 }
5194 return false;
5195}
5196
5197
5198static void FpuUnaryR80Test(void)
5199{
5200 X86FXSTATE State;
5201 RT_ZERO(State);
5202 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
5203 {
5204 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryR80[iFn]))
5205 continue;
5206
5207 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
5208 uint32_t const cTests = g_aFpuUnaryR80[iFn].cTests;
5209 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
5210 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
5211 uint32_t cRndErrs = 0;
5212 uint32_t cPossibleRndErrs = 0;
5213 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5214 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5215 {
5216 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5217 {
5218 RTFLOAT80U const InVal = paTests[iTest].InVal;
5219 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
5220 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
5221 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
5222 State.FSW = paTests[iTest].fFswIn;
5223 pfn(&State, &Res, &InVal);
5224 bool fRndErr = false;
5225 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
5226 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
5227 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5228 "%s -> fsw=%#06x %s\n"
5229 "%s expected %#06x %s%s%s%s (%s)\n",
5230 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5231 FormatR80(&paTests[iTest].InVal),
5232 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
5233 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
5234 FswDiff(Res.FSW, paTests[iTest].fFswOut),
5235 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
5236 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
5237 cRndErrs += fRndErr;
5238 cPossibleRndErrs += fRndErrOk;
5239 }
5240 pfn = g_aFpuUnaryR80[iFn].pfnNative;
5241 }
5242 if (cPossibleRndErrs > 0)
5243 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
5244 FREE_DECOMPRESSED_TESTS(g_aFpuUnaryR80[iFn]);
5245 }
5246}
5247
5248
5249/*
5250 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
5251 */
5252TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
5253
5254static FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
5255{
5256 ENTRY_BIN(ftst_r80),
5257 ENTRY_BIN_EX(fxam_r80, 1),
5258};
5259
5260#ifdef TSTIEMAIMPL_WITH_GENERATOR
5261static RTEXITCODE FpuUnaryFswR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5262{
5263 static RTFLOAT80U const s_aSpecials[] =
5264 {
5265 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5266 };
5267
5268 X86FXSTATE State;
5269 RT_ZERO(State);
5270 uint32_t cMinNormals = cTests / 4;
5271 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5272 {
5273 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
5274 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
5275 if ( g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5276 && g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5277 continue;
5278 State.FTW = 0;
5279
5280 IEMBINARYOUTPUT BinOut;
5281 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryFswR80[iFn]), RTEXITCODE_FAILURE);
5282 uint32_t cNormalInputs = 0;
5283 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5284 {
5285 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5286 if (RTFLOAT80U_IS_NORMAL(&InVal))
5287 cNormalInputs++;
5288 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5289 {
5290 iTest -= 1;
5291 continue;
5292 }
5293
5294 uint16_t const fFcw = RandFcw();
5295 State.FSW = RandFsw();
5296 if (!fIsFxam)
5297 {
5298 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5299 {
5300 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5301 {
5302 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
5303 {
5304 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
5305 | (iRounding << X86_FCW_RC_SHIFT)
5306 | (iPrecision << X86_FCW_PC_SHIFT)
5307 | iMask;
5308 uint16_t fFswOut = 0;
5309 pfn(&State, &fFswOut, &InVal);
5310 FPU_UNARY_R80_TEST_T const Test = { State.FCW, State.FSW, fFswOut, InVal };
5311 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5312 }
5313 }
5314 }
5315 }
5316 else
5317 {
5318 uint16_t fFswOut = 0;
5319 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
5320 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
5321 State.FCW = fFcw;
5322 pfn(&State, &fFswOut, &InVal);
5323 FPU_UNARY_R80_TEST_T const Test = { (uint16_t)(fFcw | fEmpty), State.FSW, fFswOut, InVal };
5324 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5325 }
5326 }
5327 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5328 }
5329 return RTEXITCODE_SUCCESS;
5330}
5331DUMP_ALL_FN(FpuUnaryFswR80, g_aFpuUnaryFswR80)
5332#endif
5333
5334
5335static void FpuUnaryFswR80Test(void)
5336{
5337 X86FXSTATE State;
5338 RT_ZERO(State);
5339 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5340 {
5341 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryFswR80[iFn]))
5342 continue;
5343
5344 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
5345 uint32_t const cTests = g_aFpuUnaryFswR80[iFn].cTests;
5346 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
5347 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
5348 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5349 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5350 {
5351 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5352 {
5353 RTFLOAT80U const InVal = paTests[iTest].InVal;
5354 uint16_t fFswOut = 0;
5355 State.FSW = paTests[iTest].fFswIn;
5356 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
5357 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
5358 pfn(&State, &fFswOut, &InVal);
5359 if (fFswOut != paTests[iTest].fFswOut)
5360 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5361 "%s -> fsw=%#06x\n"
5362 "%s expected %#06x %s (%s%s)\n",
5363 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5364 FormatR80(&paTests[iTest].InVal),
5365 iVar ? " " : "", fFswOut,
5366 iVar ? " " : "", paTests[iTest].fFswOut,
5367 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
5368 paTests[iTest].fFcw & 0x80 ? " empty" : "");
5369 }
5370 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
5371 }
5372
5373 FREE_DECOMPRESSED_TESTS(g_aFpuUnaryFswR80[iFn]);
5374 }
5375}
5376
5377/*
5378 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
5379 */
5380TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
5381
5382static FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
5383{
5384 ENTRY_BIN(fxtract_r80_r80),
5385 ENTRY_BIN_AMD( fptan_r80_r80, 0), // rounding differences
5386 ENTRY_BIN_INTEL(fptan_r80_r80, 0),
5387 ENTRY_BIN_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
5388 ENTRY_BIN_INTEL(fsincos_r80_r80, 0),
5389};
5390
5391#ifdef TSTIEMAIMPL_WITH_GENERATOR
5392static RTEXITCODE FpuUnaryTwoR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5393{
5394 static RTFLOAT80U const s_aSpecials[] =
5395 {
5396 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5397 };
5398
5399 X86FXSTATE State;
5400 RT_ZERO(State);
5401 uint32_t cMinNormals = cTests / 4;
5402 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5403 {
5404 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
5405 if ( g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5406 && g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5407 continue;
5408
5409 IEMBINARYOUTPUT BinOut;
5410 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryTwoR80[iFn]), RTEXITCODE_FAILURE);
5411 uint32_t cNormalInputs = 0;
5412 uint32_t cTargetRangeInputs = 0;
5413 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5414 {
5415 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5416 if (RTFLOAT80U_IS_NORMAL(&InVal))
5417 {
5418 if (iFn != 0)
5419 {
5420 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
5421 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
5422 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
5423 cTargetRangeInputs++;
5424 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
5425 {
5426 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
5427 cTargetRangeInputs++;
5428 }
5429 }
5430 cNormalInputs++;
5431 }
5432 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5433 {
5434 iTest -= 1;
5435 continue;
5436 }
5437
5438 uint16_t const fFcwExtra = 0; /* for rounding error indication */
5439 uint16_t const fFcw = RandFcw();
5440 State.FSW = RandFsw();
5441
5442 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5443 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5444 {
5445 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
5446 | (iRounding << X86_FCW_RC_SHIFT)
5447 | (iPrecision << X86_FCW_PC_SHIFT)
5448 | X86_FCW_MASK_ALL;
5449 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5450 pfn(&State, &ResM, &InVal);
5451 FPU_UNARY_TWO_R80_TEST_T const TestM
5452 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result1, ResM.r80Result2 };
5453 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5454
5455 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5456 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5457 pfn(&State, &ResU, &InVal);
5458 FPU_UNARY_TWO_R80_TEST_T const TestU
5459 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result1, ResU.r80Result2 };
5460 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5461
5462 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5463 if (fXcpt)
5464 {
5465 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5466 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5467 pfn(&State, &Res1, &InVal);
5468 FPU_UNARY_TWO_R80_TEST_T const Test1
5469 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result1, Res1.r80Result2 };
5470 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5471
5472 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5473 {
5474 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
5475 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5476 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5477 pfn(&State, &Res2, &InVal);
5478 FPU_UNARY_TWO_R80_TEST_T const Test2
5479 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result1, Res2.r80Result2 };
5480 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5481 }
5482 if (!RT_IS_POWER_OF_TWO(fXcpt))
5483 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5484 if (fUnmasked & fXcpt)
5485 {
5486 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
5487 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5488 pfn(&State, &Res3, &InVal);
5489 FPU_UNARY_TWO_R80_TEST_T const Test3
5490 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result1, Res3.r80Result2 };
5491 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5492 }
5493 }
5494 }
5495 }
5496 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5497 }
5498 return RTEXITCODE_SUCCESS;
5499}
5500DUMP_ALL_FN(FpuUnaryTwoR80, g_aFpuUnaryTwoR80)
5501#endif
5502
5503
5504static void FpuUnaryTwoR80Test(void)
5505{
5506 X86FXSTATE State;
5507 RT_ZERO(State);
5508 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5509 {
5510 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryTwoR80[iFn]))
5511 continue;
5512
5513 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
5514 uint32_t const cTests = g_aFpuUnaryTwoR80[iFn].cTests;
5515 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
5516 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
5517 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5518 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5519 {
5520 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5521 {
5522 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5523 RTFLOAT80U const InVal = paTests[iTest].InVal;
5524 State.FCW = paTests[iTest].fFcw;
5525 State.FSW = paTests[iTest].fFswIn;
5526 pfn(&State, &Res, &InVal);
5527 if ( Res.FSW != paTests[iTest].fFswOut
5528 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
5529 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
5530 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5531 "%s -> fsw=%#06x %s %s\n"
5532 "%s expected %#06x %s %s %s%s%s (%s)\n",
5533 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5534 FormatR80(&paTests[iTest].InVal),
5535 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
5536 iVar ? " " : "", paTests[iTest].fFswOut,
5537 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
5538 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
5539 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
5540 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
5541 }
5542 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
5543 }
5544
5545 FREE_DECOMPRESSED_TESTS(g_aFpuUnaryTwoR80[iFn]);
5546 }
5547}
5548
5549
5550/*********************************************************************************************************************************
5551* SSE floating point Binary Operations *
5552*********************************************************************************************************************************/
5553
5554/*
5555 * Binary SSE operations on packed single precision floating point values.
5556 */
5557TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5558
5559static SSE_BINARY_R32_T g_aSseBinaryR32[] =
5560{
5561 ENTRY_BIN(addps_u128),
5562 ENTRY_BIN(mulps_u128),
5563 ENTRY_BIN(subps_u128),
5564 ENTRY_BIN(minps_u128),
5565 ENTRY_BIN(divps_u128),
5566 ENTRY_BIN(maxps_u128),
5567 ENTRY_BIN(haddps_u128),
5568 ENTRY_BIN(hsubps_u128),
5569 ENTRY_BIN(sqrtps_u128),
5570 ENTRY_BIN(addsubps_u128),
5571 ENTRY_BIN(cvtps2pd_u128),
5572};
5573
5574#ifdef TSTIEMAIMPL_WITH_GENERATOR
5575DUMP_ALL_FN(SseBinaryR32, g_aSseBinaryR32)
5576static RTEXITCODE SseBinaryR32Generate(uint32_t cTests, const char * const *papszNameFmts)
5577{
5578 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5579
5580 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
5581 {
5582 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
5583 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
5584 /** @todo More specials. */
5585 };
5586
5587 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5588 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5589 {
5590 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
5591
5592 IEMBINARYOUTPUT BinOut;
5593 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32[iFn]), RTEXITCODE_FAILURE);
5594
5595 uint32_t cNormalInputPairs = 0;
5596 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5597 {
5598 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5599
5600 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5601 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5602 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5603 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5604
5605 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5606 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
5607 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
5608 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
5609
5610 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
5611 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
5612 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
5613 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
5614 cNormalInputPairs++;
5615 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5616 {
5617 iTest -= 1;
5618 continue;
5619 }
5620
5621 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5622 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5623 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5624 for (uint8_t iFz = 0; iFz < 2; iFz++)
5625 {
5626 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5627 | (iRounding << X86_MXCSR_RC_SHIFT)
5628 | (iDaz ? X86_MXCSR_DAZ : 0)
5629 | (iFz ? X86_MXCSR_FZ : 0)
5630 | X86_MXCSR_XCPT_MASK;
5631 X86XMMREG ResM; RT_ZERO(ResM);
5632 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.InVal2);
5633 TestData.fMxcsrIn = uMxCsrIn;
5634 TestData.fMxcsrOut = uMxCsrOutM;
5635 TestData.OutVal = ResM;
5636 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5637
5638 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
5639 X86XMMREG ResU; RT_ZERO(ResU);
5640 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.InVal2);
5641 TestData.fMxcsrIn = uMxCsrIn;
5642 TestData.fMxcsrOut = uMxCsrOutU;
5643 TestData.OutVal = ResU;
5644 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5645
5646 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
5647 if (fXcpt)
5648 {
5649 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5650 X86XMMREG Res1; RT_ZERO(Res1);
5651 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.InVal2);
5652 TestData.fMxcsrIn = uMxCsrIn;
5653 TestData.fMxcsrOut = uMxCsrOut1;
5654 TestData.OutVal = Res1;
5655 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5656
5657 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
5658 {
5659 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
5660 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5661 X86XMMREG Res2; RT_ZERO(Res2);
5662 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.InVal2);
5663 TestData.fMxcsrIn = uMxCsrIn;
5664 TestData.fMxcsrOut = uMxCsrOut2;
5665 TestData.OutVal = Res2;
5666 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5667 }
5668 if (!RT_IS_POWER_OF_TWO(fXcpt))
5669 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5670 if (fUnmasked & fXcpt)
5671 {
5672 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5673 X86XMMREG Res3; RT_ZERO(Res3);
5674 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.InVal2);
5675 TestData.fMxcsrIn = uMxCsrIn;
5676 TestData.fMxcsrOut = uMxCsrOut3;
5677 TestData.OutVal = Res3;
5678 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5679 }
5680 }
5681 }
5682 }
5683 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5684 }
5685
5686 return RTEXITCODE_SUCCESS;
5687}
5688#endif
5689
5690static void SseBinaryR32Test(void)
5691{
5692 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5693 {
5694 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32[iFn]))
5695 continue;
5696
5697 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
5698 uint32_t const cbTests = g_aSseBinaryR32[iFn].cTests;
5699 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
5700 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
5701 if (!cbTests) RTTestSkipped(g_hTest, "no tests");
5702 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5703 {
5704 for (uint32_t iTest = 0; iTest < cbTests / sizeof(paTests[0]); iTest++)
5705 {
5706 X86XMMREG Res; RT_ZERO(Res);
5707
5708 uint32_t uMxCsrOut = pfn(paTests[iTest].fMxcsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5709 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
5710 && RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
5711 && RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
5712 && RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5713 if ( uMxCsrOut != paTests[iTest].fMxcsrOut
5714 || !fValsIdentical)
5715 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
5716 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5717 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5718 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5719 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5720 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5721 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
5722 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
5723 iVar ? " " : "", uMxCsrOut,
5724 FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
5725 FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
5726 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5727 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5728 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5729 MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
5730 !fValsIdentical ? " - val" : "",
5731 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5732 }
5733 pfn = g_aSseBinaryR32[iFn].pfnNative;
5734 }
5735
5736 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32[iFn]);
5737 }
5738}
5739
5740
5741/*
5742 * Binary SSE operations on packed single precision floating point values.
5743 */
5744TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5745
5746static SSE_BINARY_R64_T g_aSseBinaryR64[] =
5747{
5748 ENTRY_BIN(addpd_u128),
5749 ENTRY_BIN(mulpd_u128),
5750 ENTRY_BIN(subpd_u128),
5751 ENTRY_BIN(minpd_u128),
5752 ENTRY_BIN(divpd_u128),
5753 ENTRY_BIN(maxpd_u128),
5754 ENTRY_BIN(haddpd_u128),
5755 ENTRY_BIN(hsubpd_u128),
5756 ENTRY_BIN(sqrtpd_u128),
5757 ENTRY_BIN(addsubpd_u128),
5758 ENTRY_BIN(cvtpd2ps_u128),
5759};
5760
5761#ifdef TSTIEMAIMPL_WITH_GENERATOR
5762DUMP_ALL_FN(SseBinaryR64, g_aSseBinaryR32)
5763static RTEXITCODE SseBinaryR64Generate(uint32_t cTests, const char * const *papszNameFmts)
5764{
5765 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5766
5767 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
5768 {
5769 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
5770 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
5771 /** @todo More specials. */
5772 };
5773
5774 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5775 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5776 {
5777 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
5778
5779 IEMBINARYOUTPUT BinOut;
5780 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64[iFn]), RTEXITCODE_FAILURE);
5781
5782 uint32_t cNormalInputPairs = 0;
5783 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5784 {
5785 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5786
5787 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5788 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5789 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5790 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5791
5792 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5793 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
5794 cNormalInputPairs++;
5795 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5796 {
5797 iTest -= 1;
5798 continue;
5799 }
5800
5801 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5802 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5803 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5804 for (uint8_t iFz = 0; iFz < 2; iFz++)
5805 {
5806 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5807 | (iRounding << X86_MXCSR_RC_SHIFT)
5808 | (iDaz ? X86_MXCSR_DAZ : 0)
5809 | (iFz ? X86_MXCSR_FZ : 0)
5810 | X86_MXCSR_XCPT_MASK;
5811 X86XMMREG ResM; RT_ZERO(ResM);
5812 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.InVal2);
5813 TestData.fMxcsrIn = uMxCsrIn;
5814 TestData.fMxcsrOut = uMxCsrOutM;
5815 TestData.OutVal = ResM;
5816 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5817
5818 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
5819 X86XMMREG ResU; RT_ZERO(ResU);
5820 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.InVal2);
5821 TestData.fMxcsrIn = uMxCsrIn;
5822 TestData.fMxcsrOut = uMxCsrOutU;
5823 TestData.OutVal = ResU;
5824 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5825
5826 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
5827 if (fXcpt)
5828 {
5829 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5830 X86XMMREG Res1; RT_ZERO(Res1);
5831 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.InVal2);
5832 TestData.fMxcsrIn = uMxCsrIn;
5833 TestData.fMxcsrOut = uMxCsrOut1;
5834 TestData.OutVal = Res1;
5835 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5836
5837 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
5838 {
5839 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
5840 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5841 X86XMMREG Res2; RT_ZERO(Res2);
5842 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.InVal2);
5843 TestData.fMxcsrIn = uMxCsrIn;
5844 TestData.fMxcsrOut = uMxCsrOut2;
5845 TestData.OutVal = Res2;
5846 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5847 }
5848 if (!RT_IS_POWER_OF_TWO(fXcpt))
5849 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5850 if (fUnmasked & fXcpt)
5851 {
5852 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5853 X86XMMREG Res3; RT_ZERO(Res3);
5854 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.InVal2);
5855 TestData.fMxcsrIn = uMxCsrIn;
5856 TestData.fMxcsrOut = uMxCsrOut3;
5857 TestData.OutVal = Res3;
5858 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5859 }
5860 }
5861 }
5862 }
5863 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5864 }
5865
5866 return RTEXITCODE_SUCCESS;
5867}
5868#endif
5869
5870
5871static void SseBinaryR64Test(void)
5872{
5873 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5874 {
5875 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64[iFn]))
5876 continue;
5877
5878 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
5879 uint32_t const cTests = g_aSseBinaryR64[iFn].cTests;
5880 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
5881 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
5882 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5883 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5884 {
5885 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5886 {
5887 X86XMMREG Res; RT_ZERO(Res);
5888
5889 uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
5890 uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5891 if ( uMxCsrOut != paTests[iTest].fMxcsrOut
5892 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
5893 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5894 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
5895 "%s -> mxcsr=%#08x %s'%s\n"
5896 "%s expected %#08x %s'%s%s%s (%s)\n",
5897 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5898 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5899 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
5900 iVar ? " " : "", uMxCsrOut,
5901 FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
5902 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5903 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5904 MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
5905 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
5906 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5907 ? " - val" : "",
5908 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5909 }
5910 pfn = g_aSseBinaryR64[iFn].pfnNative;
5911 }
5912
5913 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64[iFn]);
5914 }
5915}
5916
5917
5918/*
5919 * Binary SSE operations on packed single precision floating point values.
5920 */
5921TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
5922
5923static SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
5924{
5925 ENTRY_BIN(addss_u128_r32),
5926 ENTRY_BIN(mulss_u128_r32),
5927 ENTRY_BIN(subss_u128_r32),
5928 ENTRY_BIN(minss_u128_r32),
5929 ENTRY_BIN(divss_u128_r32),
5930 ENTRY_BIN(maxss_u128_r32),
5931 ENTRY_BIN(cvtss2sd_u128_r32),
5932 ENTRY_BIN(sqrtss_u128_r32),
5933};
5934
5935#ifdef TSTIEMAIMPL_WITH_GENERATOR
5936DUMP_ALL_FN(SseBinaryU128R32, g_aSseBinaryU128R32)
5937static RTEXITCODE SseBinaryU128R32Generate(uint32_t cTests, const char * const *papszNameFmts)
5938{
5939 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5940
5941 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
5942 {
5943 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5944 /** @todo More specials. */
5945 };
5946
5947 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5948 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5949 {
5950 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
5951
5952 IEMBINARYOUTPUT BinOut;
5953 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R32[iFn]), RTEXITCODE_FAILURE);
5954
5955 uint32_t cNormalInputPairs = 0;
5956 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5957 {
5958 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5959
5960 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5961 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5962 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5963 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5964
5965 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5966
5967 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5968 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5969 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5970 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5971 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5972 cNormalInputPairs++;
5973 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5974 {
5975 iTest -= 1;
5976 continue;
5977 }
5978
5979 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5980 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5981 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5982 for (uint8_t iFz = 0; iFz < 2; iFz++)
5983 {
5984 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
5985 | (iRounding << X86_MXCSR_RC_SHIFT)
5986 | (iDaz ? X86_MXCSR_DAZ : 0)
5987 | (iFz ? X86_MXCSR_FZ : 0)
5988 | X86_MXCSR_XCPT_MASK;
5989 X86XMMREG ResM; RT_ZERO(ResM);
5990 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &ResM, &TestData.InVal1, &TestData.r32Val2);
5991 TestData.fMxcsrIn = uMxCsrIn;
5992 TestData.fMxcsrOut = uMxCsrOutM;
5993 TestData.OutVal = ResM;
5994 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5995
5996 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
5997 X86XMMREG ResU; RT_ZERO(ResU);
5998 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &ResU, &TestData.InVal1, &TestData.r32Val2);
5999 TestData.fMxcsrIn = uMxCsrIn;
6000 TestData.fMxcsrOut = uMxCsrOutU;
6001 TestData.OutVal = ResU;
6002 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6003
6004 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
6005 if (fXcpt)
6006 {
6007 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6008 X86XMMREG Res1; RT_ZERO(Res1);
6009 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &Res1, &TestData.InVal1, &TestData.r32Val2);
6010 TestData.fMxcsrIn = uMxCsrIn;
6011 TestData.fMxcsrOut = uMxCsrOut1;
6012 TestData.OutVal = Res1;
6013 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6014
6015 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
6016 {
6017 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
6018 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6019 X86XMMREG Res2; RT_ZERO(Res2);
6020 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &Res2, &TestData.InVal1, &TestData.r32Val2);
6021 TestData.fMxcsrIn = uMxCsrIn;
6022 TestData.fMxcsrOut = uMxCsrOut2;
6023 TestData.OutVal = Res2;
6024 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6025 }
6026 if (!RT_IS_POWER_OF_TWO(fXcpt))
6027 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6028 if (fUnmasked & fXcpt)
6029 {
6030 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6031 X86XMMREG Res3; RT_ZERO(Res3);
6032 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &Res3, &TestData.InVal1, &TestData.r32Val2);
6033 TestData.fMxcsrIn = uMxCsrIn;
6034 TestData.fMxcsrOut = uMxCsrOut3;
6035 TestData.OutVal = Res3;
6036 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6037 }
6038 }
6039 }
6040 }
6041 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6042 }
6043
6044 return RTEXITCODE_SUCCESS;
6045}
6046#endif
6047
6048static void SseBinaryU128R32Test(void)
6049{
6050 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
6051 {
6052 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R32[iFn]))
6053 continue;
6054
6055 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
6056 uint32_t const cTests = g_aSseBinaryU128R32[iFn].cTests;
6057 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
6058 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
6059 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6060 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6061 {
6062 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6063 {
6064 X86XMMREG Res; RT_ZERO(Res);
6065
6066 uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
6067 uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
6068 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
6069 && RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
6070 && RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
6071 && RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]);
6072 if ( uMxCsrOut != paTests[iTest].fMxcsrOut
6073 || !fValsIdentical)
6074 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
6075 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
6076 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
6077 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6078 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
6079 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
6080 FormatR32(&paTests[iTest].r32Val2),
6081 iVar ? " " : "", uMxCsrOut,
6082 FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
6083 FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
6084 iVar ? " " : "", paTests[iTest].fMxcsrOut,
6085 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
6086 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
6087 MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
6088 !fValsIdentical ? " - val" : "",
6089 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6090 }
6091 }
6092
6093 FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R32[iFn]);
6094 }
6095}
6096
6097
6098/*
6099 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
6100 */
6101TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
6102
6103static SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
6104{
6105 ENTRY_BIN(addsd_u128_r64),
6106 ENTRY_BIN(mulsd_u128_r64),
6107 ENTRY_BIN(subsd_u128_r64),
6108 ENTRY_BIN(minsd_u128_r64),
6109 ENTRY_BIN(divsd_u128_r64),
6110 ENTRY_BIN(maxsd_u128_r64),
6111 ENTRY_BIN(cvtsd2ss_u128_r64),
6112 ENTRY_BIN(sqrtsd_u128_r64),
6113};
6114
6115#ifdef TSTIEMAIMPL_WITH_GENERATOR
6116DUMP_ALL_FN(SseBinaryU128R64, g_aSseBinaryU128R64)
6117static RTEXITCODE SseBinaryU128R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6118{
6119 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6120
6121 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
6122 {
6123 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6124 /** @todo More specials. */
6125 };
6126
6127 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6128 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
6129 {
6130 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
6131
6132 IEMBINARYOUTPUT BinOut;
6133 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R64[iFn]), RTEXITCODE_FAILURE);
6134
6135 uint32_t cNormalInputPairs = 0;
6136 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6137 {
6138 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
6139
6140 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
6141 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
6142 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
6143
6144 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
6145 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
6146 cNormalInputPairs++;
6147 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6148 {
6149 iTest -= 1;
6150 continue;
6151 }
6152
6153 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6154 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6155 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6156 for (uint8_t iFz = 0; iFz < 2; iFz++)
6157 {
6158 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6159 | (iRounding << X86_MXCSR_RC_SHIFT)
6160 | (iDaz ? X86_MXCSR_DAZ : 0)
6161 | (iFz ? X86_MXCSR_FZ : 0)
6162 | X86_MXCSR_XCPT_MASK;
6163 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6164 TestData.fMxcsrIn = uMxCsrIn;
6165 TestData.fMxcsrOut = uMxCsrOutM;
6166 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6167
6168 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6169 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6170 TestData.fMxcsrIn = uMxCsrIn;
6171 TestData.fMxcsrOut = uMxCsrOutU;
6172 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6173
6174 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
6175 if (fXcpt)
6176 {
6177 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6178 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6179 TestData.fMxcsrIn = uMxCsrIn;
6180 TestData.fMxcsrOut = uMxCsrOut1;
6181 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6182
6183 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
6184 {
6185 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
6186 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6187 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6188 TestData.fMxcsrIn = uMxCsrIn;
6189 TestData.fMxcsrOut = uMxCsrOut2;
6190 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6191 }
6192 if (!RT_IS_POWER_OF_TWO(fXcpt))
6193 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6194 if (fUnmasked & fXcpt)
6195 {
6196 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6197 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.InVal1, &TestData.r64Val2);
6198 TestData.fMxcsrIn = uMxCsrIn;
6199 TestData.fMxcsrOut = uMxCsrOut3;
6200 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6201 }
6202 }
6203 }
6204 }
6205 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6206 }
6207
6208 return RTEXITCODE_SUCCESS;
6209}
6210#endif
6211
6212
6213static void SseBinaryU128R64Test(void)
6214{
6215 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
6216 {
6217 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R64[iFn]))
6218 continue;
6219
6220 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
6221 uint32_t const cTests = g_aSseBinaryU128R64[iFn].cTests;
6222 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
6223 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
6224 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6225 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6226 {
6227 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6228 {
6229 X86XMMREG Res; RT_ZERO(Res);
6230
6231 uint32_t uMxCsrIn = paTests[iTest].fMxcsrIn;
6232 uint32_t uMxCsrOut = pfn(uMxCsrIn, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
6233 if ( uMxCsrOut != paTests[iTest].fMxcsrOut
6234 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
6235 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6236 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
6237 "%s -> mxcsr=%#08x %s'%s\n"
6238 "%s expected %#08x %s'%s%s%s (%s)\n",
6239 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6240 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
6241 FormatR64(&paTests[iTest].r64Val2),
6242 iVar ? " " : "", uMxCsrOut,
6243 FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
6244 iVar ? " " : "", paTests[iTest].fMxcsrOut,
6245 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
6246 MxcsrDiff(uMxCsrOut, paTests[iTest].fMxcsrOut),
6247 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
6248 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6249 ? " - val" : "",
6250 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6251 }
6252 }
6253
6254 FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R64[iFn]);
6255 }
6256}
6257
6258
6259/*
6260 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
6261 */
6262TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
6263
6264static SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
6265{
6266 ENTRY_BIN(cvttsd2si_i32_r64),
6267 ENTRY_BIN(cvtsd2si_i32_r64),
6268};
6269
6270#ifdef TSTIEMAIMPL_WITH_GENERATOR
6271DUMP_ALL_FN(SseBinaryI32R64, g_aSseBinaryI32R64)
6272static RTEXITCODE SseBinaryI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6273{
6274 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6275
6276 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6277 {
6278 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6279 /** @todo More specials. */
6280 };
6281
6282 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6283 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6284 {
6285 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
6286
6287 IEMBINARYOUTPUT BinOut;
6288 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R64[iFn]), RTEXITCODE_FAILURE);
6289
6290 uint32_t cNormalInputPairs = 0;
6291 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6292 {
6293 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
6294
6295 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6296
6297 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6298 cNormalInputPairs++;
6299 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6300 {
6301 iTest -= 1;
6302 continue;
6303 }
6304
6305 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6306 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6307 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6308 for (uint8_t iFz = 0; iFz < 2; iFz++)
6309 {
6310 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6311 | (iRounding << X86_MXCSR_RC_SHIFT)
6312 | (iDaz ? X86_MXCSR_DAZ : 0)
6313 | (iFz ? X86_MXCSR_FZ : 0)
6314 | X86_MXCSR_XCPT_MASK;
6315 uint32_t fMxcsrM; int32_t i32OutM;
6316 fMxcsrM = pfn(uMxCsrIn, &i32OutM, &TestData.r64ValIn.u);
6317 TestData.fMxcsrIn = uMxCsrIn;
6318 TestData.fMxcsrOut = fMxcsrM;
6319 TestData.i32ValOut = i32OutM;
6320 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6321
6322 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6323 uint32_t fMxcsrU; int32_t i32OutU;
6324 fMxcsrU = pfn(uMxCsrIn, &i32OutU, &TestData.r64ValIn.u);
6325 TestData.fMxcsrIn = uMxCsrIn;
6326 TestData.fMxcsrOut = fMxcsrU;
6327 TestData.i32ValOut = i32OutU;
6328 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6329
6330 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6331 if (fXcpt)
6332 {
6333 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6334 uint32_t fMxcsr1; int32_t i32Out1;
6335 fMxcsr1 = pfn(uMxCsrIn, &i32Out1, &TestData.r64ValIn.u);
6336 TestData.fMxcsrIn = uMxCsrIn;
6337 TestData.fMxcsrOut = fMxcsr1;
6338 TestData.i32ValOut = i32Out1;
6339 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6340
6341 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6342 {
6343 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6344 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6345 uint32_t fMxcsr2; int32_t i32Out2;
6346 fMxcsr2 = pfn(uMxCsrIn, &i32Out2, &TestData.r64ValIn.u);
6347 TestData.fMxcsrIn = uMxCsrIn;
6348 TestData.fMxcsrOut = fMxcsr2;
6349 TestData.i32ValOut = i32Out2;
6350 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6351 }
6352 if (!RT_IS_POWER_OF_TWO(fXcpt))
6353 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6354 if (fUnmasked & fXcpt)
6355 {
6356 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6357 uint32_t fMxcsr3; int32_t i32Out3;
6358 fMxcsr3 = pfn(uMxCsrIn, &i32Out3, &TestData.r64ValIn.u);
6359 TestData.fMxcsrIn = uMxCsrIn;
6360 TestData.fMxcsrOut = fMxcsr3;
6361 TestData.i32ValOut = i32Out3;
6362 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6363 }
6364 }
6365 }
6366 }
6367 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6368 }
6369
6370 return RTEXITCODE_SUCCESS;
6371}
6372#endif
6373
6374
6375static void SseBinaryI32R64Test(void)
6376{
6377 X86FXSTATE State;
6378 RT_ZERO(State);
6379 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6380 {
6381 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R64[iFn]))
6382 continue;
6383
6384 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
6385 uint32_t const cTests = g_aSseBinaryI32R64[iFn].cTests;
6386 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
6387 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6388 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6389 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6390 {
6391 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6392 {
6393 int32_t i32Dst = 0;
6394
6395 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i32Dst, &paTests[iTest].r64ValIn.u);
6396 if ( fMxcsr != paTests[iTest].fMxcsrOut
6397 || i32Dst != paTests[iTest].i32ValOut)
6398 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6399 "%s -> mxcsr=%#08x %RI32\n"
6400 "%s expected %#08x %RI32%s%s (%s)\n",
6401 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6402 FormatR64(&paTests[iTest].r64ValIn),
6403 iVar ? " " : "", fMxcsr, i32Dst,
6404 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6405 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6406 i32Dst != paTests[iTest].i32ValOut
6407 ? " - val" : "",
6408 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6409 }
6410 }
6411
6412 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R64[iFn]);
6413 }
6414}
6415
6416
6417/*
6418 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
6419 */
6420TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
6421
6422static SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
6423{
6424 ENTRY_BIN(cvttsd2si_i64_r64),
6425 ENTRY_BIN(cvtsd2si_i64_r64),
6426};
6427
6428#ifdef TSTIEMAIMPL_WITH_GENERATOR
6429DUMP_ALL_FN(SseBinaryI64R64, g_aSseBinaryI64R64)
6430static RTEXITCODE SseBinaryI64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6431{
6432 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6433
6434 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6435 {
6436 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6437 /** @todo More specials. */
6438 };
6439
6440 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6441 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6442 {
6443 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
6444
6445 IEMBINARYOUTPUT BinOut;
6446 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R64[iFn]), RTEXITCODE_FAILURE);
6447
6448 uint32_t cNormalInputPairs = 0;
6449 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6450 {
6451 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
6452
6453 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6454
6455 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6456 cNormalInputPairs++;
6457 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6458 {
6459 iTest -= 1;
6460 continue;
6461 }
6462
6463 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6464 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6465 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6466 for (uint8_t iFz = 0; iFz < 2; iFz++)
6467 {
6468 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6469 | (iRounding << X86_MXCSR_RC_SHIFT)
6470 | (iDaz ? X86_MXCSR_DAZ : 0)
6471 | (iFz ? X86_MXCSR_FZ : 0)
6472 | X86_MXCSR_XCPT_MASK;
6473 uint32_t fMxcsrM; int64_t i64OutM;
6474 fMxcsrM = pfn(uMxCsrIn, &i64OutM, &TestData.r64ValIn.u);
6475 TestData.fMxcsrIn = uMxCsrIn;
6476 TestData.fMxcsrOut = fMxcsrM;
6477 TestData.i64ValOut = i64OutM;
6478 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6479
6480 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6481 uint32_t fMxcsrU; int64_t i64OutU;
6482 fMxcsrU =pfn(uMxCsrIn, &i64OutU, &TestData.r64ValIn.u);
6483 TestData.fMxcsrIn = uMxCsrIn;
6484 TestData.fMxcsrOut = fMxcsrU;
6485 TestData.i64ValOut = i64OutU;
6486 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6487
6488 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6489 if (fXcpt)
6490 {
6491 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6492 uint32_t fMxcsr1; int64_t i64Out1;
6493 fMxcsr1 = pfn(uMxCsrIn, &i64Out1, &TestData.r64ValIn.u);
6494 TestData.fMxcsrIn = uMxCsrIn;
6495 TestData.fMxcsrOut = fMxcsr1;
6496 TestData.i64ValOut = i64Out1;
6497 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6498
6499 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6500 {
6501 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6502 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6503 uint32_t fMxcsr2; int64_t i64Out2;
6504 fMxcsr2 = pfn(uMxCsrIn, &i64Out2, &TestData.r64ValIn.u);
6505 TestData.fMxcsrIn = uMxCsrIn;
6506 TestData.fMxcsrOut = fMxcsr2;
6507 TestData.i64ValOut = i64Out2;
6508 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6509 }
6510 if (!RT_IS_POWER_OF_TWO(fXcpt))
6511 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6512 if (fUnmasked & fXcpt)
6513 {
6514 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6515 uint32_t fMxcsr3; int64_t i64Out3;
6516 fMxcsr3 = pfn(uMxCsrIn, &i64Out3, &TestData.r64ValIn.u);
6517 TestData.fMxcsrIn = uMxCsrIn;
6518 TestData.fMxcsrOut = fMxcsr3;
6519 TestData.i64ValOut = i64Out3;
6520 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6521 }
6522 }
6523 }
6524 }
6525 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6526 }
6527
6528 return RTEXITCODE_SUCCESS;
6529}
6530#endif
6531
6532
6533static void SseBinaryI64R64Test(void)
6534{
6535 X86FXSTATE State;
6536 RT_ZERO(State);
6537 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6538 {
6539 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R64[iFn]))
6540 continue;
6541
6542 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
6543 uint32_t const cTests = g_aSseBinaryI64R64[iFn].cTests;
6544 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
6545 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6546 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6547 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6548 {
6549 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6550 {
6551 int64_t i64Dst = 0;
6552 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i64Dst, &paTests[iTest].r64ValIn.u);
6553 if ( fMxcsr != paTests[iTest].fMxcsrOut
6554 || i64Dst != paTests[iTest].i64ValOut)
6555 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6556 "%s -> mxcsr=%#08x %RI64\n"
6557 "%s expected %#08x %RI64%s%s (%s)\n",
6558 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6559 FormatR64(&paTests[iTest].r64ValIn),
6560 iVar ? " " : "", fMxcsr, i64Dst,
6561 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6562 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6563 i64Dst != paTests[iTest].i64ValOut
6564 ? " - val" : "",
6565 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6566 }
6567 }
6568
6569 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R64[iFn]);
6570 }
6571}
6572
6573
6574/*
6575 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
6576 */
6577TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
6578
6579static SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
6580{
6581 ENTRY_BIN(cvttss2si_i32_r32),
6582 ENTRY_BIN(cvtss2si_i32_r32),
6583};
6584
6585#ifdef TSTIEMAIMPL_WITH_GENERATOR
6586DUMP_ALL_FN(SseBinaryI32R32, g_aSseBinaryI32R32)
6587static RTEXITCODE SseBinaryI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6588{
6589 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6590
6591 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6592 {
6593 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6594 /** @todo More specials. */
6595 };
6596
6597 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6598 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6599 {
6600 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
6601
6602 IEMBINARYOUTPUT BinOut;
6603 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R32[iFn]), RTEXITCODE_FAILURE);
6604
6605 uint32_t cNormalInputPairs = 0;
6606 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6607 {
6608 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
6609
6610 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6611
6612 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6613 cNormalInputPairs++;
6614 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6615 {
6616 iTest -= 1;
6617 continue;
6618 }
6619
6620 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6621 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6622 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6623 for (uint8_t iFz = 0; iFz < 2; iFz++)
6624 {
6625 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6626 | (iRounding << X86_MXCSR_RC_SHIFT)
6627 | (iDaz ? X86_MXCSR_DAZ : 0)
6628 | (iFz ? X86_MXCSR_FZ : 0)
6629 | X86_MXCSR_XCPT_MASK;
6630 uint32_t fMxcsrM; int32_t i32OutM;
6631 fMxcsrM = pfn(uMxCsrIn, &i32OutM, &TestData.r32ValIn.u);
6632 TestData.fMxcsrIn = uMxCsrIn;
6633 TestData.fMxcsrOut = fMxcsrM;
6634 TestData.i32ValOut = i32OutM;
6635 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6636
6637 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6638 uint32_t fMxcsrU; int32_t i32OutU;
6639 fMxcsrU = pfn(uMxCsrIn, &i32OutU, &TestData.r32ValIn.u);
6640 TestData.fMxcsrIn = uMxCsrIn;
6641 TestData.fMxcsrOut = fMxcsrU;
6642 TestData.i32ValOut = i32OutU;
6643 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6644
6645 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6646 if (fXcpt)
6647 {
6648 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6649 uint32_t fMxcsr1; int32_t i32Out1;
6650 fMxcsr1 = pfn(uMxCsrIn, &i32Out1, &TestData.r32ValIn.u);
6651 TestData.fMxcsrIn = uMxCsrIn;
6652 TestData.fMxcsrOut = fMxcsr1;
6653 TestData.i32ValOut = i32Out1;
6654 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6655
6656 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6657 {
6658 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6659 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6660 uint32_t fMxcsr2; int32_t i32Out2;
6661 fMxcsr2 = pfn(uMxCsrIn, &i32Out2, &TestData.r32ValIn.u);
6662 TestData.fMxcsrIn = uMxCsrIn;
6663 TestData.fMxcsrOut = fMxcsr2;
6664 TestData.i32ValOut = i32Out2;
6665 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6666 }
6667 if (!RT_IS_POWER_OF_TWO(fXcpt))
6668 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6669 if (fUnmasked & fXcpt)
6670 {
6671 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6672 uint32_t fMxcsr3; int32_t i32Out3;
6673 fMxcsr3 = pfn(uMxCsrIn, &i32Out3, &TestData.r32ValIn.u);
6674 TestData.fMxcsrIn = uMxCsrIn;
6675 TestData.fMxcsrOut = fMxcsr3;
6676 TestData.i32ValOut = i32Out3;
6677 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6678 }
6679 }
6680 }
6681 }
6682 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6683 }
6684
6685 return RTEXITCODE_SUCCESS;
6686}
6687#endif
6688
6689
6690static void SseBinaryI32R32Test(void)
6691{
6692 X86FXSTATE State;
6693 RT_ZERO(State);
6694 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6695 {
6696 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R32[iFn]))
6697 continue;
6698
6699 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
6700 uint32_t const cTests = g_aSseBinaryI32R32[iFn].cTests;
6701 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
6702 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
6703 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6704 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6705 {
6706 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6707 {
6708 int32_t i32Dst = 0;
6709
6710 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i32Dst, &paTests[iTest].r32ValIn.u);
6711 if ( fMxcsr != paTests[iTest].fMxcsrOut
6712 || i32Dst != paTests[iTest].i32ValOut)
6713 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6714 "%s -> mxcsr=%#08x %RI32\n"
6715 "%s expected %#08x %RI32%s%s (%s)\n",
6716 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6717 FormatR32(&paTests[iTest].r32ValIn),
6718 iVar ? " " : "", fMxcsr, i32Dst,
6719 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6720 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6721 i32Dst != paTests[iTest].i32ValOut
6722 ? " - val" : "",
6723 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6724 }
6725 }
6726
6727 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R32[iFn]);
6728 }
6729}
6730
6731
6732/*
6733 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
6734 */
6735TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
6736
6737static SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
6738{
6739 ENTRY_BIN(cvttss2si_i64_r32),
6740 ENTRY_BIN(cvtss2si_i64_r32),
6741};
6742
6743#ifdef TSTIEMAIMPL_WITH_GENERATOR
6744DUMP_ALL_FN(SseBinaryI64R32, g_aSseBinaryI64R32)
6745static RTEXITCODE SseBinaryI64R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6746{
6747 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6748
6749 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6750 {
6751 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6752 /** @todo More specials. */
6753 };
6754
6755 X86FXSTATE State;
6756 RT_ZERO(State);
6757 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6758 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6759 {
6760 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
6761
6762 IEMBINARYOUTPUT BinOut;
6763 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R32[iFn]), RTEXITCODE_FAILURE);
6764
6765 uint32_t cNormalInputPairs = 0;
6766 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6767 {
6768 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
6769
6770 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6771
6772 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6773 cNormalInputPairs++;
6774 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6775 {
6776 iTest -= 1;
6777 continue;
6778 }
6779
6780 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6781 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6782 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6783 for (uint8_t iFz = 0; iFz < 2; iFz++)
6784 {
6785 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6786 | (iRounding << X86_MXCSR_RC_SHIFT)
6787 | (iDaz ? X86_MXCSR_DAZ : 0)
6788 | (iFz ? X86_MXCSR_FZ : 0)
6789 | X86_MXCSR_XCPT_MASK;
6790 uint32_t fMxcsrM; int64_t i64OutM;
6791 fMxcsrM = pfn(uMxCsrIn, &i64OutM, &TestData.r32ValIn.u);
6792 TestData.fMxcsrIn = State.MXCSR;
6793 TestData.fMxcsrOut = fMxcsrM;
6794 TestData.i64ValOut = i64OutM;
6795 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6796
6797 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6798 uint32_t fMxcsrU; int64_t i64OutU;
6799 fMxcsrU = pfn(uMxCsrIn, &i64OutU, &TestData.r32ValIn.u);
6800 TestData.fMxcsrIn = State.MXCSR;
6801 TestData.fMxcsrOut = fMxcsrU;
6802 TestData.i64ValOut = i64OutU;
6803 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6804
6805 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6806 if (fXcpt)
6807 {
6808 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6809 uint32_t fMxcsr1; int64_t i64Out1;
6810 fMxcsr1 = pfn(uMxCsrIn, &i64Out1, &TestData.r32ValIn.u);
6811 TestData.fMxcsrIn = State.MXCSR;
6812 TestData.fMxcsrOut = fMxcsr1;
6813 TestData.i64ValOut = i64Out1;
6814 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6815
6816 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6817 {
6818 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6819 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6820 uint32_t fMxcsr2; int64_t i64Out2;
6821 fMxcsr2 = pfn(uMxCsrIn, &i64Out2, &TestData.r32ValIn.u);
6822 TestData.fMxcsrIn = State.MXCSR;
6823 TestData.fMxcsrOut = fMxcsr2;
6824 TestData.i64ValOut = i64Out2;
6825 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6826 }
6827 if (!RT_IS_POWER_OF_TWO(fXcpt))
6828 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6829 if (fUnmasked & fXcpt)
6830 {
6831 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6832 uint32_t fMxcsr3; int64_t i64Out3;
6833 fMxcsr3 = pfn(uMxCsrIn, &i64Out3, &TestData.r32ValIn.u);
6834 TestData.fMxcsrIn = State.MXCSR;
6835 TestData.fMxcsrOut = fMxcsr3;
6836 TestData.i64ValOut = i64Out3;
6837 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6838 }
6839 }
6840 }
6841 }
6842 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6843 }
6844
6845 return RTEXITCODE_SUCCESS;
6846}
6847#endif
6848
6849
6850static void SseBinaryI64R32Test(void)
6851{
6852 X86FXSTATE State;
6853 RT_ZERO(State);
6854 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6855 {
6856 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R32[iFn]))
6857 continue;
6858
6859 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
6860 uint32_t const cTests = g_aSseBinaryI64R32[iFn].cTests;
6861 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
6862 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
6863 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6864 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6865 {
6866 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6867 {
6868 int64_t i64Dst = 0;
6869
6870 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &i64Dst, &paTests[iTest].r32ValIn.u);
6871 if ( fMxcsr != paTests[iTest].fMxcsrOut
6872 || i64Dst != paTests[iTest].i64ValOut)
6873 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6874 "%s -> mxcsr=%#08x %RI64\n"
6875 "%s expected %#08x %RI64%s%s (%s)\n",
6876 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6877 FormatR32(&paTests[iTest].r32ValIn),
6878 iVar ? " " : "", fMxcsr, i64Dst,
6879 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6880 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6881 i64Dst != paTests[iTest].i64ValOut
6882 ? " - val" : "",
6883 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6884 }
6885 }
6886
6887 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R32[iFn]);
6888 }
6889}
6890
6891
6892/*
6893 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6894 */
6895TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6896
6897static SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6898{
6899 ENTRY_BIN(cvtsi2sd_r64_i32)
6900};
6901
6902#ifdef TSTIEMAIMPL_WITH_GENERATOR
6903DUMP_ALL_FN(SseBinaryR64I32, g_aSseBinaryR64I32)
6904static RTEXITCODE SseBinaryR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
6905{
6906 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6907
6908 static int32_t const s_aSpecials[] =
6909 {
6910 INT32_MIN,
6911 INT32_MAX,
6912 /** @todo More specials. */
6913 };
6914
6915 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6916 {
6917 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6918
6919 IEMBINARYOUTPUT BinOut;
6920 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I32[iFn]), RTEXITCODE_FAILURE);
6921
6922 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6923 {
6924 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6925
6926 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6927
6928 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6929 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6930 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6931 for (uint8_t iFz = 0; iFz < 2; iFz++)
6932 {
6933 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6934 | (iRounding << X86_MXCSR_RC_SHIFT)
6935 | (iDaz ? X86_MXCSR_DAZ : 0)
6936 | (iFz ? X86_MXCSR_FZ : 0)
6937 | X86_MXCSR_XCPT_MASK;
6938 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6939 fMxcsrM = pfn(uMxCsrIn, &r64OutM, &TestData.i32ValIn);
6940 TestData.fMxcsrIn = uMxCsrIn;
6941 TestData.fMxcsrOut = fMxcsrM;
6942 TestData.r64ValOut = r64OutM;
6943 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6944
6945 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
6946 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6947 fMxcsrU = pfn(uMxCsrIn, &r64OutU, &TestData.i32ValIn);
6948 TestData.fMxcsrIn = uMxCsrIn;
6949 TestData.fMxcsrOut = fMxcsrU;
6950 TestData.r64ValOut = r64OutU;
6951 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6952
6953 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6954 if (fXcpt)
6955 {
6956 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6957 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6958 fMxcsr1 = pfn(uMxCsrIn, &r64Out1, &TestData.i32ValIn);
6959 TestData.fMxcsrIn = uMxCsrIn;
6960 TestData.fMxcsrOut = fMxcsr1;
6961 TestData.r64ValOut = r64Out1;
6962 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6963
6964 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6965 {
6966 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6967 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6968 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6969 fMxcsr2 = pfn(uMxCsrIn, &r64Out2, &TestData.i32ValIn);
6970 TestData.fMxcsrIn = uMxCsrIn;
6971 TestData.fMxcsrOut = fMxcsr2;
6972 TestData.r64ValOut = r64Out2;
6973 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6974 }
6975 if (!RT_IS_POWER_OF_TWO(fXcpt))
6976 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6977 if (fUnmasked & fXcpt)
6978 {
6979 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6980 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6981 fMxcsr3 = pfn(uMxCsrIn, &r64Out3, &TestData.i32ValIn);
6982 TestData.fMxcsrIn = uMxCsrIn;
6983 TestData.fMxcsrOut = fMxcsr3;
6984 TestData.r64ValOut = r64Out3;
6985 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6986 }
6987 }
6988 }
6989 }
6990 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6991 }
6992
6993 return RTEXITCODE_SUCCESS;
6994}
6995#endif
6996
6997
6998static void SseBinaryR64I32Test(void)
6999{
7000 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
7001 {
7002 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I32[iFn]))
7003 continue;
7004
7005 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
7006 uint32_t const cTests = g_aSseBinaryR64I32[iFn].cTests;
7007 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
7008 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
7009 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7010 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7011 {
7012 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7013 {
7014 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
7015
7016 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r64Dst, &paTests[iTest].i32ValIn);
7017 if ( fMxcsr != paTests[iTest].fMxcsrOut
7018 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
7019 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
7020 "%s -> mxcsr=%#08x %s\n"
7021 "%s expected %#08x %s%s%s (%s)\n",
7022 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7023 &paTests[iTest].i32ValIn,
7024 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
7025 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
7026 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7027 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
7028 ? " - val" : "",
7029 FormatMxcsr(paTests[iTest].fMxcsrIn) );
7030 }
7031 }
7032
7033 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I32[iFn]);
7034 }
7035}
7036
7037
7038/*
7039 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
7040 */
7041TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
7042
7043static SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
7044{
7045 ENTRY_BIN(cvtsi2sd_r64_i64),
7046};
7047
7048#ifdef TSTIEMAIMPL_WITH_GENERATOR
7049DUMP_ALL_FN(SseBinaryR64I64, g_aSseBinaryR64I64)
7050static RTEXITCODE SseBinaryR64I64Generate(uint32_t cTests, const char * const *papszNameFmts)
7051{
7052 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7053
7054 static int64_t const s_aSpecials[] =
7055 {
7056 INT64_MIN,
7057 INT64_MAX
7058 /** @todo More specials. */
7059 };
7060
7061 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
7062 {
7063 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
7064
7065 IEMBINARYOUTPUT BinOut;
7066 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I64[iFn]), RTEXITCODE_FAILURE);
7067
7068 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7069 {
7070 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
7071
7072 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
7073
7074 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7075 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7076 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7077 for (uint8_t iFz = 0; iFz < 2; iFz++)
7078 {
7079 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7080 | (iRounding << X86_MXCSR_RC_SHIFT)
7081 | (iDaz ? X86_MXCSR_DAZ : 0)
7082 | (iFz ? X86_MXCSR_FZ : 0)
7083 | X86_MXCSR_XCPT_MASK;
7084 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
7085 fMxcsrM = pfn(uMxCsrIn, &r64OutM, &TestData.i64ValIn);
7086 TestData.fMxcsrIn = uMxCsrIn;
7087 TestData.fMxcsrOut = fMxcsrM;
7088 TestData.r64ValOut = r64OutM;
7089 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7090
7091 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7092 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
7093 fMxcsrU = pfn(uMxCsrIn, &r64OutU, &TestData.i64ValIn);
7094 TestData.fMxcsrIn = uMxCsrIn;
7095 TestData.fMxcsrOut = fMxcsrU;
7096 TestData.r64ValOut = r64OutU;
7097 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7098
7099 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7100 if (fXcpt)
7101 {
7102 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7103 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
7104 fMxcsr1 = pfn(uMxCsrIn, &r64Out1, &TestData.i64ValIn);
7105 TestData.fMxcsrIn = uMxCsrIn;
7106 TestData.fMxcsrOut = fMxcsr1;
7107 TestData.r64ValOut = r64Out1;
7108 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7109
7110 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7111 {
7112 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7113 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7114 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
7115 fMxcsr2 = pfn(uMxCsrIn, &r64Out2, &TestData.i64ValIn);
7116 TestData.fMxcsrIn = uMxCsrIn;
7117 TestData.fMxcsrOut = fMxcsr2;
7118 TestData.r64ValOut = r64Out2;
7119 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7120 }
7121 if (!RT_IS_POWER_OF_TWO(fXcpt))
7122 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7123 if (fUnmasked & fXcpt)
7124 {
7125 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7126 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
7127 fMxcsr3 = pfn(uMxCsrIn, &r64Out3, &TestData.i64ValIn);
7128 TestData.fMxcsrIn = uMxCsrIn;
7129 TestData.fMxcsrOut = fMxcsr3;
7130 TestData.r64ValOut = r64Out3;
7131 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7132 }
7133 }
7134 }
7135 }
7136 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7137 }
7138
7139 return RTEXITCODE_SUCCESS;
7140}
7141#endif
7142
7143
7144static void SseBinaryR64I64Test(void)
7145{
7146 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
7147 {
7148 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I64[iFn]))
7149 continue;
7150
7151 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
7152 uint32_t const cTests = g_aSseBinaryR64I64[iFn].cTests;
7153 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
7154 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
7155 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7156 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7157 {
7158 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7159 {
7160 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
7161
7162 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r64Dst, &paTests[iTest].i64ValIn);
7163 if ( fMxcsr != paTests[iTest].fMxcsrOut
7164 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
7165 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7166 "%s -> mxcsr=%#08x %s\n"
7167 "%s expected %#08x %s%s%s (%s)\n",
7168 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7169 &paTests[iTest].i64ValIn,
7170 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
7171 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
7172 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7173 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
7174 ? " - val" : "",
7175 FormatMxcsr(paTests[iTest].fMxcsrIn) );
7176 }
7177 }
7178
7179 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I64[iFn]);
7180 }
7181}
7182
7183
7184/*
7185 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
7186 */
7187TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
7188
7189static SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
7190{
7191 ENTRY_BIN(cvtsi2ss_r32_i32),
7192};
7193
7194#ifdef TSTIEMAIMPL_WITH_GENERATOR
7195DUMP_ALL_FN(SseBinaryR32I32, g_aSseBinaryR32I32)
7196static RTEXITCODE SseBinaryR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
7197{
7198 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7199
7200 static int32_t const s_aSpecials[] =
7201 {
7202 INT32_MIN,
7203 INT32_MAX,
7204 /** @todo More specials. */
7205 };
7206
7207 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7208 {
7209 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
7210
7211 IEMBINARYOUTPUT BinOut;
7212 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I32[iFn]), RTEXITCODE_FAILURE);
7213
7214 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7215 {
7216 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
7217
7218 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7219
7220 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7221 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7222 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7223 for (uint8_t iFz = 0; iFz < 2; iFz++)
7224 {
7225 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7226 | (iRounding << X86_MXCSR_RC_SHIFT)
7227 | (iDaz ? X86_MXCSR_DAZ : 0)
7228 | (iFz ? X86_MXCSR_FZ : 0)
7229 | X86_MXCSR_XCPT_MASK;
7230 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7231 fMxcsrM = pfn(uMxCsrIn, &r32OutM, &TestData.i32ValIn);
7232 TestData.fMxcsrIn = uMxCsrIn;
7233 TestData.fMxcsrOut = fMxcsrM;
7234 TestData.r32ValOut = r32OutM;
7235 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7236
7237 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7238 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7239 fMxcsrU = pfn(uMxCsrIn, &r32OutU, &TestData.i32ValIn);
7240 TestData.fMxcsrIn = uMxCsrIn;
7241 TestData.fMxcsrOut = fMxcsrU;
7242 TestData.r32ValOut = r32OutU;
7243 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7244
7245 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7246 if (fXcpt)
7247 {
7248 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7249 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7250 fMxcsr1 = pfn(uMxCsrIn, &r32Out1, &TestData.i32ValIn);
7251 TestData.fMxcsrIn = uMxCsrIn;
7252 TestData.fMxcsrOut = fMxcsr1;
7253 TestData.r32ValOut = r32Out1;
7254 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7255
7256 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7257 {
7258 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7259 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7260 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7261 fMxcsr2 = pfn(uMxCsrIn, &r32Out2, &TestData.i32ValIn);
7262 TestData.fMxcsrIn = uMxCsrIn;
7263 TestData.fMxcsrOut = fMxcsr2;
7264 TestData.r32ValOut = r32Out2;
7265 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7266 }
7267 if (!RT_IS_POWER_OF_TWO(fXcpt))
7268 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7269 if (fUnmasked & fXcpt)
7270 {
7271 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7272 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7273 fMxcsr3 = pfn(uMxCsrIn, &r32Out3, &TestData.i32ValIn);
7274 TestData.fMxcsrIn = uMxCsrIn;
7275 TestData.fMxcsrOut = fMxcsr3;
7276 TestData.r32ValOut = r32Out3;
7277 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7278 }
7279 }
7280 }
7281 }
7282 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7283 }
7284
7285 return RTEXITCODE_SUCCESS;
7286}
7287#endif
7288
7289
7290static void SseBinaryR32I32Test(void)
7291{
7292 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7293 {
7294 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I32[iFn]))
7295 continue;
7296
7297 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
7298 uint32_t const cTests = g_aSseBinaryR32I32[iFn].cTests;
7299 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
7300 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
7301 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7302 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7303 {
7304 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7305 {
7306 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7307
7308 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r32Dst, &paTests[iTest].i32ValIn);
7309 if ( fMxcsr != paTests[iTest].fMxcsrOut
7310 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7311 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
7312 "%s -> mxcsr=%#08x %RI32\n"
7313 "%s expected %#08x %RI32%s%s (%s)\n",
7314 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7315 &paTests[iTest].i32ValIn,
7316 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7317 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7318 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7319 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7320 ? " - val" : "",
7321 FormatMxcsr(paTests[iTest].fMxcsrIn) );
7322 }
7323 }
7324
7325 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I32[iFn]);
7326 }
7327}
7328
7329
7330/*
7331 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
7332 */
7333TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
7334
7335static SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
7336{
7337 ENTRY_BIN(cvtsi2ss_r32_i64),
7338};
7339
7340#ifdef TSTIEMAIMPL_WITH_GENERATOR
7341DUMP_ALL_FN(SseBinaryR32I64, g_aSseBinaryR32I64)
7342static RTEXITCODE SseBinaryR32I64Generate(uint32_t cTests, const char * const *papszNameFmts)
7343{
7344 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7345
7346 static int64_t const s_aSpecials[] =
7347 {
7348 INT64_MIN,
7349 INT64_MAX
7350 /** @todo More specials. */
7351 };
7352
7353 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7354 {
7355 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
7356
7357 IEMBINARYOUTPUT BinOut;
7358 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I64[iFn]), RTEXITCODE_FAILURE);
7359
7360 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7361 {
7362 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
7363
7364 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
7365
7366 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7367 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7368 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7369 for (uint8_t iFz = 0; iFz < 2; iFz++)
7370 {
7371 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7372 | (iRounding << X86_MXCSR_RC_SHIFT)
7373 | (iDaz ? X86_MXCSR_DAZ : 0)
7374 | (iFz ? X86_MXCSR_FZ : 0)
7375 | X86_MXCSR_XCPT_MASK;
7376 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7377 fMxcsrM = pfn(uMxCsrIn, &r32OutM, &TestData.i64ValIn);
7378 TestData.fMxcsrIn = uMxCsrIn;
7379 TestData.fMxcsrOut = fMxcsrM;
7380 TestData.r32ValOut = r32OutM;
7381 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7382
7383 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
7384 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7385 fMxcsrU = pfn(uMxCsrIn, &r32OutU, &TestData.i64ValIn);
7386 TestData.fMxcsrIn = uMxCsrIn;
7387 TestData.fMxcsrOut = fMxcsrU;
7388 TestData.r32ValOut = r32OutU;
7389 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7390
7391 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7392 if (fXcpt)
7393 {
7394 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7395 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7396 fMxcsr1 = pfn(uMxCsrIn, &r32Out1, &TestData.i64ValIn);
7397 TestData.fMxcsrIn = uMxCsrIn;
7398 TestData.fMxcsrOut = fMxcsr1;
7399 TestData.r32ValOut = r32Out1;
7400 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7401
7402 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7403 {
7404 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7405 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7406 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7407 fMxcsr2 = pfn(uMxCsrIn, &r32Out2, &TestData.i64ValIn);
7408 TestData.fMxcsrIn = uMxCsrIn;
7409 TestData.fMxcsrOut = fMxcsr2;
7410 TestData.r32ValOut = r32Out2;
7411 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7412 }
7413 if (!RT_IS_POWER_OF_TWO(fXcpt))
7414 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7415 if (fUnmasked & fXcpt)
7416 {
7417 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7418 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7419 fMxcsr3 = pfn(uMxCsrIn, &r32Out3, &TestData.i64ValIn);
7420 TestData.fMxcsrIn = uMxCsrIn;
7421 TestData.fMxcsrOut = fMxcsr3;
7422 TestData.r32ValOut = r32Out3;
7423 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7424 }
7425 }
7426 }
7427 }
7428 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7429 }
7430
7431 return RTEXITCODE_SUCCESS;
7432}
7433#endif
7434
7435
7436static void SseBinaryR32I64Test(void)
7437{
7438 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7439 {
7440 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I64[iFn]))
7441 continue;
7442
7443 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
7444 uint32_t const cTests = g_aSseBinaryR32I64[iFn].cTests;
7445 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
7446 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
7447 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7448 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7449 {
7450 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7451 {
7452 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7453
7454 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &r32Dst, &paTests[iTest].i64ValIn);
7455 if ( fMxcsr != paTests[iTest].fMxcsrOut
7456 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7457 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7458 "%s -> mxcsr=%#08x %RI32\n"
7459 "%s expected %#08x %RI32%s%s (%s)\n",
7460 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7461 &paTests[iTest].i64ValIn,
7462 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7463 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7464 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7465 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7466 ? " - val" : "",
7467 FormatMxcsr(paTests[iTest].fMxcsrIn) );
7468 }
7469 }
7470
7471 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I64[iFn]);
7472 }
7473}
7474
7475
7476/*
7477 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7478 */
7479TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSRR32R32);
7480
7481static SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
7482{
7483 ENTRY_BIN(ucomiss_u128),
7484 ENTRY_BIN(comiss_u128),
7485 ENTRY_BIN_AVX(vucomiss_u128),
7486 ENTRY_BIN_AVX(vcomiss_u128),
7487};
7488
7489#ifdef TSTIEMAIMPL_WITH_GENERATOR
7490DUMP_ALL_FN(SseCompareEflR32R32, g_aSseCompareEflR32R32)
7491static RTEXITCODE SseCompareEflR32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
7492{
7493 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7494
7495 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7496 {
7497 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7498 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7499 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7500 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7501 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7502 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7503 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7504 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7505 /** @todo More specials. */
7506 };
7507
7508 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7509 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7510 {
7511 PFNIEMAIMPLF2EFLMXCSRR32R32 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
7512
7513 IEMBINARYOUTPUT BinOut;
7514 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR32R32[iFn]), RTEXITCODE_FAILURE);
7515
7516 uint32_t cNormalInputPairs = 0;
7517 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7518 {
7519 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
7520
7521 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7522 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7523
7524 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
7525 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
7526 cNormalInputPairs++;
7527 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7528 {
7529 iTest -= 1;
7530 continue;
7531 }
7532
7533 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7534 uint32_t const fEFlags = RandEFlags();
7535 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7536 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7537 for (uint8_t iFz = 0; iFz < 2; iFz++)
7538 {
7539 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7540 | (iRounding << X86_MXCSR_RC_SHIFT)
7541 | (iDaz ? X86_MXCSR_DAZ : 0)
7542 | (iFz ? X86_MXCSR_FZ : 0)
7543 | X86_MXCSR_XCPT_MASK;
7544 uint32_t fMxcsrM = fMxcsrIn;
7545 uint32_t fEFlagsM = fEFlags;
7546 fMxcsrM = pfn(fMxcsrIn, &fEFlagsM, TestData.r32ValIn1, TestData.r32ValIn2);
7547 TestData.fMxcsrIn = fMxcsrIn;
7548 TestData.fMxcsrOut = fMxcsrM;
7549 TestData.fEflIn = fEFlags;
7550 TestData.fEflOut = fEFlagsM;
7551 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7552
7553 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7554 uint32_t fMxcsrU = fMxcsrIn;
7555 uint32_t fEFlagsU = fEFlags;
7556 fMxcsrU = pfn(fMxcsrIn, &fEFlagsU, TestData.r32ValIn1, TestData.r32ValIn2);
7557 TestData.fMxcsrIn = fMxcsrIn;
7558 TestData.fMxcsrOut = fMxcsrU;
7559 TestData.fEflIn = fEFlags;
7560 TestData.fEflOut = fEFlagsU;
7561 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7562
7563 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7564 if (fXcpt)
7565 {
7566 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7567 uint32_t fMxcsr1 = fMxcsrIn;
7568 uint32_t fEFlags1 = fEFlags;
7569 fMxcsr1 = pfn(fMxcsrIn, &fEFlags1, TestData.r32ValIn1, TestData.r32ValIn2);
7570 TestData.fMxcsrIn = fMxcsrIn;
7571 TestData.fMxcsrOut = fMxcsr1;
7572 TestData.fEflIn = fEFlags;
7573 TestData.fEflOut = fEFlags1;
7574 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7575
7576 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7577 {
7578 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7579 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7580 uint32_t fMxcsr2 = fMxcsrIn;
7581 uint32_t fEFlags2 = fEFlags;
7582 fMxcsr2 = pfn(fMxcsrIn, &fEFlags2, TestData.r32ValIn1, TestData.r32ValIn2);
7583 TestData.fMxcsrIn = fMxcsrIn;
7584 TestData.fMxcsrOut = fMxcsr2;
7585 TestData.fEflIn = fEFlags;
7586 TestData.fEflOut = fEFlags2;
7587 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7588 }
7589 if (!RT_IS_POWER_OF_TWO(fXcpt))
7590 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7591 if (fUnmasked & fXcpt)
7592 {
7593 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7594 uint32_t fMxcsr3 = fMxcsrIn;
7595 uint32_t fEFlags3 = fEFlags;
7596 fMxcsr3 = pfn(fMxcsrIn, &fEFlags3, TestData.r32ValIn1, TestData.r32ValIn2);
7597 TestData.fMxcsrIn = fMxcsrIn;
7598 TestData.fMxcsrOut = fMxcsr3;
7599 TestData.fEflIn = fEFlags;
7600 TestData.fEflOut = fEFlags3;
7601 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7602 }
7603 }
7604 }
7605 }
7606 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7607 }
7608
7609 return RTEXITCODE_SUCCESS;
7610}
7611#endif
7612
7613static void SseCompareEflR32R32Test(void)
7614{
7615 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7616 {
7617 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR32R32[iFn]))
7618 continue;
7619
7620 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
7621 uint32_t const cTests = g_aSseCompareEflR32R32[iFn].cTests;
7622 PFNIEMAIMPLF2EFLMXCSRR32R32 pfn = g_aSseCompareEflR32R32[iFn].pfn;
7623 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
7624 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7625 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7626 {
7627 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7628 {
7629 uint32_t fEFlags = paTests[iTest].fEflIn;
7630 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &fEFlags, paTests[iTest].r32ValIn1, paTests[iTest].r32ValIn2);
7631 if ( fMxcsr != paTests[iTest].fMxcsrOut
7632 || fEFlags != paTests[iTest].fEflOut)
7633 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7634 "%s -> mxcsr=%#08x %#08x\n"
7635 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7636 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7637 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
7638 iVar ? " " : "", fMxcsr, fEFlags,
7639 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7640 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7641 FormatMxcsr(paTests[iTest].fMxcsrIn),
7642 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7643 }
7644 }
7645
7646 FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR32R32[iFn]);
7647 }
7648}
7649
7650
7651/*
7652 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7653 */
7654TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSRR64R64);
7655
7656static SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
7657{
7658 ENTRY_BIN(ucomisd_u128),
7659 ENTRY_BIN(comisd_u128),
7660 ENTRY_BIN_AVX(vucomisd_u128),
7661 ENTRY_BIN_AVX(vcomisd_u128)
7662};
7663
7664#ifdef TSTIEMAIMPL_WITH_GENERATOR
7665DUMP_ALL_FN(SseCompareEflR64R64, g_aSseCompareEflR64R64)
7666static RTEXITCODE SseCompareEflR64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
7667{
7668 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7669
7670 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7671 {
7672 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7673 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7674 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7675 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7676 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7677 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7678 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7679 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7680 /** @todo More specials. */
7681 };
7682
7683 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7684 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7685 {
7686 PFNIEMAIMPLF2EFLMXCSRR64R64 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
7687
7688 IEMBINARYOUTPUT BinOut;
7689 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR64R64[iFn]), RTEXITCODE_FAILURE);
7690
7691 uint32_t cNormalInputPairs = 0;
7692 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7693 {
7694 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
7695
7696 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7697 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7698
7699 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
7700 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
7701 cNormalInputPairs++;
7702 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7703 {
7704 iTest -= 1;
7705 continue;
7706 }
7707
7708 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7709 uint32_t const fEFlags = RandEFlags();
7710 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7711 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7712 for (uint8_t iFz = 0; iFz < 2; iFz++)
7713 {
7714 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7715 | (iRounding << X86_MXCSR_RC_SHIFT)
7716 | (iDaz ? X86_MXCSR_DAZ : 0)
7717 | (iFz ? X86_MXCSR_FZ : 0)
7718 | X86_MXCSR_XCPT_MASK;
7719 uint32_t fMxcsrM = fMxcsrIn;
7720 uint32_t fEFlagsM = fEFlags;
7721 fMxcsrM = pfn(fMxcsrIn, &fEFlagsM, TestData.r64ValIn1, TestData.r64ValIn2);
7722 TestData.fMxcsrIn = fMxcsrIn;
7723 TestData.fMxcsrOut = fMxcsrM;
7724 TestData.fEflIn = fEFlags;
7725 TestData.fEflOut = fEFlagsM;
7726 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7727
7728 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7729 uint32_t fMxcsrU = fMxcsrIn;
7730 uint32_t fEFlagsU = fEFlags;
7731 fMxcsrU = pfn(fMxcsrIn, &fEFlagsU, TestData.r64ValIn1, TestData.r64ValIn2);
7732 TestData.fMxcsrIn = fMxcsrIn;
7733 TestData.fMxcsrOut = fMxcsrU;
7734 TestData.fEflIn = fEFlags;
7735 TestData.fEflOut = fEFlagsU;
7736 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7737
7738 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7739 if (fXcpt)
7740 {
7741 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7742 uint32_t fMxcsr1 = fMxcsrIn;
7743 uint32_t fEFlags1 = fEFlags;
7744 fMxcsr1 = pfn(fMxcsrIn, &fEFlags1, TestData.r64ValIn1, TestData.r64ValIn2);
7745 TestData.fMxcsrIn = fMxcsrIn;
7746 TestData.fMxcsrOut = fMxcsr1;
7747 TestData.fEflIn = fEFlags;
7748 TestData.fEflOut = fEFlags1;
7749 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7750
7751 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7752 {
7753 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7754 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7755 uint32_t fMxcsr2 = fMxcsrIn;
7756 uint32_t fEFlags2 = fEFlags;
7757 fMxcsr2 = pfn(fMxcsrIn, &fEFlags2, TestData.r64ValIn1, TestData.r64ValIn2);
7758 TestData.fMxcsrIn = fMxcsrIn;
7759 TestData.fMxcsrOut = fMxcsr2;
7760 TestData.fEflIn = fEFlags;
7761 TestData.fEflOut = fEFlags2;
7762 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7763 }
7764 if (!RT_IS_POWER_OF_TWO(fXcpt))
7765 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7766 if (fUnmasked & fXcpt)
7767 {
7768 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7769 uint32_t fMxcsr3 = fMxcsrIn;
7770 uint32_t fEFlags3 = fEFlags;
7771 fMxcsr3 = pfn(fMxcsrIn, &fEFlags3, TestData.r64ValIn1, TestData.r64ValIn2);
7772 TestData.fMxcsrIn = fMxcsrIn;
7773 TestData.fMxcsrOut = fMxcsr3;
7774 TestData.fEflIn = fEFlags;
7775 TestData.fEflOut = fEFlags3;
7776 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7777 }
7778 }
7779 }
7780 }
7781 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7782 }
7783
7784 return RTEXITCODE_SUCCESS;
7785}
7786#endif
7787
7788static void SseCompareEflR64R64Test(void)
7789{
7790 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7791 {
7792 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR64R64[iFn]))
7793 continue;
7794
7795 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
7796 uint32_t const cTests = g_aSseCompareEflR64R64[iFn].cTests;
7797 PFNIEMAIMPLF2EFLMXCSRR64R64 pfn = g_aSseCompareEflR64R64[iFn].pfn;
7798 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7799 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7800 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7801 {
7802 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7803 {
7804 uint32_t fEFlags = paTests[iTest].fEflIn;
7805 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &fEFlags, paTests[iTest].r64ValIn1, paTests[iTest].r64ValIn2);
7806 if ( fMxcsr != paTests[iTest].fMxcsrOut
7807 || fEFlags != paTests[iTest].fEflOut)
7808 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7809 "%s -> mxcsr=%#08x %#08x\n"
7810 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7811 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7812 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7813 iVar ? " " : "", fMxcsr, fEFlags,
7814 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7815 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7816 FormatMxcsr(paTests[iTest].fMxcsrIn),
7817 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7818 }
7819 }
7820
7821 FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR64R64[iFn]);
7822 }
7823}
7824
7825
7826/*
7827 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7828 */
7829/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7830#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7831
7832TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
7833
7834static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
7835{
7836 ENTRY_BIN(cmpps_u128),
7837 ENTRY_BIN(cmpss_u128)
7838};
7839
7840#ifdef TSTIEMAIMPL_WITH_GENERATOR
7841DUMP_ALL_FN(SseCompareF2XmmR32Imm8, g_aSseCompareF2XmmR32Imm8)
7842static RTEXITCODE SseCompareF2XmmR32Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
7843{
7844 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7845
7846 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7847 {
7848 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7849 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7850 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7851 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7852 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7853 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7854 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7855 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7856 /** @todo More specials. */
7857 };
7858
7859 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7860 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7861 {
7862 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7863
7864 IEMBINARYOUTPUT BinOut;
7865 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF2XmmR32Imm8[iFn]), RTEXITCODE_FAILURE);
7866
7867 uint32_t cNormalInputPairs = 0;
7868 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7869 {
7870 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7871
7872 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7873 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7874 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7875 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7876
7877 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7878 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7879 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7880 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7881
7882 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7883 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7884 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7885 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7886 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7887 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7888 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7889 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7890 cNormalInputPairs++;
7891 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7892 {
7893 iTest -= 1;
7894 continue;
7895 }
7896
7897 IEMMEDIAF2XMMSRC Src;
7898 Src.uSrc1 = TestData.InVal1;
7899 Src.uSrc2 = TestData.InVal2;
7900 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7901 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7902 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7903 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7904 for (uint8_t iFz = 0; iFz < 2; iFz++)
7905 {
7906 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7907 | (iRounding << X86_MXCSR_RC_SHIFT)
7908 | (iDaz ? X86_MXCSR_DAZ : 0)
7909 | (iFz ? X86_MXCSR_FZ : 0)
7910 | X86_MXCSR_XCPT_MASK;
7911 X86XMMREG ResM;
7912 uint32_t fMxcsrM = pfn(fMxcsrIn, &ResM, &Src, bImm);
7913 TestData.fMxcsrIn = fMxcsrIn;
7914 TestData.fMxcsrOut = fMxcsrM;
7915 TestData.bImm = bImm;
7916 TestData.OutVal = ResM;
7917 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7918
7919 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7920 X86XMMREG ResU;
7921 uint32_t fMxcsrU = pfn(fMxcsrIn, &ResU, &Src, bImm);
7922 TestData.fMxcsrIn = fMxcsrIn;
7923 TestData.fMxcsrOut = fMxcsrU;
7924 TestData.bImm = bImm;
7925 TestData.OutVal = ResU;
7926 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7927
7928 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7929 if (fXcpt)
7930 {
7931 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7932 X86XMMREG Res1;
7933 uint32_t fMxcsr1 = pfn(fMxcsrIn, &Res1, &Src, bImm);
7934 TestData.fMxcsrIn = fMxcsrIn;
7935 TestData.fMxcsrOut = fMxcsr1;
7936 TestData.bImm = bImm;
7937 TestData.OutVal = Res1;
7938 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7939
7940 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7941 {
7942 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7943 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7944 X86XMMREG Res2;
7945 uint32_t fMxcsr2 = pfn(fMxcsrIn, &Res2, &Src, bImm);
7946 TestData.fMxcsrIn = fMxcsrIn;
7947 TestData.fMxcsrOut = fMxcsr2;
7948 TestData.bImm = bImm;
7949 TestData.OutVal = Res2;
7950 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7951 }
7952 if (!RT_IS_POWER_OF_TWO(fXcpt))
7953 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7954 if (fUnmasked & fXcpt)
7955 {
7956 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7957 X86XMMREG Res3;
7958 uint32_t fMxcsr3 = pfn(fMxcsrIn, &Res3, &Src, bImm);
7959 TestData.fMxcsrIn = fMxcsrIn;
7960 TestData.fMxcsrOut = fMxcsr3;
7961 TestData.bImm = bImm;
7962 TestData.OutVal = Res3;
7963 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7964 }
7965 }
7966 }
7967 }
7968 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7969 }
7970
7971 return RTEXITCODE_SUCCESS;
7972}
7973#endif
7974
7975static void SseCompareF2XmmR32Imm8Test(void)
7976{
7977 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7978 {
7979 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR32Imm8[iFn]))
7980 continue;
7981
7982 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7983 uint32_t const cTests = g_aSseCompareF2XmmR32Imm8[iFn].cTests;
7984 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7985 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7986 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7987 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7988 {
7989 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7990 {
7991 IEMMEDIAF2XMMSRC Src;
7992 X86XMMREG ValOut;
7993
7994 Src.uSrc1 = paTests[iTest].InVal1;
7995 Src.uSrc2 = paTests[iTest].InVal2;
7996 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, &Src, paTests[iTest].bImm);
7997 if ( fMxcsr != paTests[iTest].fMxcsrOut
7998 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7999 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
8000 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
8001 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
8002 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
8003 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
8004 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
8005 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8006 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
8007 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
8008 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
8009 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
8010 paTests[iTest].bImm,
8011 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
8012 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8013 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
8014 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
8015 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8016 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
8017 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
8018 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
8019 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
8020 ? " - val" : "",
8021 FormatMxcsr(paTests[iTest].fMxcsrIn));
8022 }
8023 }
8024
8025 FREE_DECOMPRESSED_TESTS(g_aSseCompareF2XmmR32Imm8[iFn]);
8026 }
8027}
8028
8029
8030/*
8031 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
8032 */
8033static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
8034{
8035 ENTRY_BIN(cmppd_u128),
8036 ENTRY_BIN(cmpsd_u128)
8037};
8038
8039#ifdef TSTIEMAIMPL_WITH_GENERATOR
8040DUMP_ALL_FN(SseCompareF2XmmR64Imm8, g_aSseCompareF2XmmR64Imm8)
8041static RTEXITCODE SseCompareF2XmmR64Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
8042{
8043 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8044
8045 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
8046 {
8047 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
8048 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
8049 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
8050 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
8051 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
8052 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
8053 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
8054 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
8055 /** @todo More specials. */
8056 };
8057
8058 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8059 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
8060 {
8061 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
8062
8063 IEMBINARYOUTPUT BinOut;
8064 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF2XmmR64Imm8[iFn]), RTEXITCODE_FAILURE);
8065
8066 uint32_t cNormalInputPairs = 0;
8067 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8068 {
8069 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
8070
8071 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
8072 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
8073
8074 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
8075 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
8076
8077 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
8078 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
8079 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
8080 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
8081 cNormalInputPairs++;
8082 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8083 {
8084 iTest -= 1;
8085 continue;
8086 }
8087
8088 IEMMEDIAF2XMMSRC Src;
8089 Src.uSrc1 = TestData.InVal1;
8090 Src.uSrc2 = TestData.InVal2;
8091 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8092 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
8093 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8094 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8095 for (uint8_t iFz = 0; iFz < 2; iFz++)
8096 {
8097 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8098 | (iRounding << X86_MXCSR_RC_SHIFT)
8099 | (iDaz ? X86_MXCSR_DAZ : 0)
8100 | (iFz ? X86_MXCSR_FZ : 0)
8101 | X86_MXCSR_XCPT_MASK;
8102 X86XMMREG ResM;
8103 uint32_t fMxcsrM = pfn(fMxcsrIn, &ResM, &Src, bImm);
8104 TestData.fMxcsrIn = fMxcsrIn;
8105 TestData.fMxcsrOut = fMxcsrM;
8106 TestData.bImm = bImm;
8107 TestData.OutVal = ResM;
8108 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8109
8110 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8111 X86XMMREG ResU;
8112 uint32_t fMxcsrU = pfn(fMxcsrIn, &ResU, &Src, bImm);
8113 TestData.fMxcsrIn = fMxcsrIn;
8114 TestData.fMxcsrOut = fMxcsrU;
8115 TestData.bImm = bImm;
8116 TestData.OutVal = ResU;
8117 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8118
8119 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8120 if (fXcpt)
8121 {
8122 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8123 X86XMMREG Res1;
8124 uint32_t fMxcsr1 = pfn(fMxcsrIn, &Res1, &Src, bImm);
8125 TestData.fMxcsrIn = fMxcsrIn;
8126 TestData.fMxcsrOut = fMxcsr1;
8127 TestData.bImm = bImm;
8128 TestData.OutVal = Res1;
8129 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8130
8131 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8132 {
8133 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8134 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8135 X86XMMREG Res2;
8136 uint32_t fMxcsr2 = pfn(fMxcsrIn, &Res2, &Src, bImm);
8137 TestData.fMxcsrIn = fMxcsrIn;
8138 TestData.fMxcsrOut = fMxcsr2;
8139 TestData.bImm = bImm;
8140 TestData.OutVal = Res2;
8141 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8142 }
8143 if (!RT_IS_POWER_OF_TWO(fXcpt))
8144 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8145 if (fUnmasked & fXcpt)
8146 {
8147 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8148 X86XMMREG Res3;
8149 uint32_t fMxcsr3 = pfn(fMxcsrIn, &Res3, &Src, bImm);
8150 TestData.fMxcsrIn = fMxcsrIn;
8151 TestData.fMxcsrOut = fMxcsr3;
8152 TestData.bImm = bImm;
8153 TestData.OutVal = Res3;
8154 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8155 }
8156 }
8157 }
8158 }
8159 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8160 }
8161
8162 return RTEXITCODE_SUCCESS;
8163}
8164#endif
8165
8166static void SseCompareF2XmmR64Imm8Test(void)
8167{
8168 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
8169 {
8170 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR64Imm8[iFn]))
8171 continue;
8172
8173 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
8174 uint32_t const cTests = g_aSseCompareF2XmmR64Imm8[iFn].cTests;
8175 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
8176 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
8177 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8178 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8179 {
8180 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8181 {
8182 IEMMEDIAF2XMMSRC Src;
8183 X86XMMREG ValOut;
8184
8185 Src.uSrc1 = paTests[iTest].InVal1;
8186 Src.uSrc2 = paTests[iTest].InVal2;
8187 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, &Src, paTests[iTest].bImm);
8188 if ( fMxcsr != paTests[iTest].fMxcsrOut
8189 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8190 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8191 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
8192 "%s -> mxcsr=%#08x %RX64'%RX64\n"
8193 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
8194 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8195 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
8196 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
8197 paTests[iTest].bImm,
8198 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
8199 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8200 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
8201 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8202 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8203 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8204 ? " - val" : "",
8205 FormatMxcsr(paTests[iTest].fMxcsrIn));
8206 }
8207 }
8208
8209 FREE_DECOMPRESSED_TESTS(g_aSseCompareF2XmmR64Imm8[iFn]);
8210 }
8211}
8212
8213
8214/*
8215 * Convert SSE operations converting signed double-words to single-precision floating point values.
8216 */
8217TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
8218
8219static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
8220{
8221 ENTRY_BIN(cvtdq2ps_u128)
8222};
8223
8224#ifdef TSTIEMAIMPL_WITH_GENERATOR
8225DUMP_ALL_FN(SseConvertXmmI32R32, g_aSseConvertXmmI32R32)
8226static RTEXITCODE SseConvertXmmI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
8227{
8228 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8229
8230 static int32_t const s_aSpecials[] =
8231 {
8232 INT32_MIN,
8233 INT32_MIN / 2,
8234 0,
8235 INT32_MAX / 2,
8236 INT32_MAX,
8237 (int32_t)0x80000000
8238 /** @todo More specials. */
8239 };
8240
8241 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8242 {
8243 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
8244
8245 IEMBINARYOUTPUT BinOut;
8246 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R32[iFn]), RTEXITCODE_FAILURE);
8247
8248 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8249 {
8250 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8251
8252 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8253 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8254 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8255 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8256
8257 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8258 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8259 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8260 for (uint8_t iFz = 0; iFz < 2; iFz++)
8261 {
8262 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8263 | (iRounding << X86_MXCSR_RC_SHIFT)
8264 | (iDaz ? X86_MXCSR_DAZ : 0)
8265 | (iFz ? X86_MXCSR_FZ : 0)
8266 | X86_MXCSR_XCPT_MASK;
8267 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8268 TestData.fMxcsrIn = uMxCsrIn;
8269 TestData.fMxcsrOut = uMxCsrOutM;
8270 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8271
8272 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8273 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8274 TestData.fMxcsrIn = uMxCsrIn;
8275 TestData.fMxcsrOut = uMxCsrOutU;
8276 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8277
8278 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8279 if (fXcpt)
8280 {
8281 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8282 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8283 TestData.fMxcsrIn = uMxCsrIn;
8284 TestData.fMxcsrOut = uMxCsrOut1;
8285 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8286
8287 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8288 {
8289 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8290 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8291 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8292 TestData.fMxcsrIn = uMxCsrIn;
8293 TestData.fMxcsrOut = uMxCsrOut2;
8294 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8295 }
8296 if (!RT_IS_POWER_OF_TWO(fXcpt))
8297 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8298 if (fUnmasked & fXcpt)
8299 {
8300 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8301 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8302 TestData.fMxcsrIn = uMxCsrIn;
8303 TestData.fMxcsrOut = uMxCsrOut3;
8304 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8305 }
8306 }
8307 }
8308 }
8309 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8310 }
8311
8312 return RTEXITCODE_SUCCESS;
8313}
8314#endif
8315
8316static void SseConvertXmmI32R32Test(void)
8317{
8318 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8319 {
8320 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R32[iFn]))
8321 continue;
8322
8323 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
8324 uint32_t const cTests = g_aSseConvertXmmI32R32[iFn].cTests;
8325 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
8326 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
8327 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8328 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8329 {
8330 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8331 {
8332 X86XMMREG Res; RT_ZERO(Res);
8333
8334 uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8335 if ( fMxCsr != paTests[iTest].fMxcsrOut
8336 || !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
8337 || !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
8338 || !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
8339 || !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8340 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8341 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
8342 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
8343 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8344 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8345 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8346 iVar ? " " : "", fMxCsr,
8347 FormatR32(&Res.ar32[0]), FormatR32(&Res.ar32[1]),
8348 FormatR32(&Res.ar32[2]), FormatR32(&Res.ar32[3]),
8349 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8350 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8351 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
8352 MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8353 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[0], &paTests[iTest].OutVal.ar32[0])
8354 || !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[1], &paTests[iTest].OutVal.ar32[1])
8355 || !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[2], &paTests[iTest].OutVal.ar32[2])
8356 || !RTFLOAT32U_ARE_IDENTICAL(&Res.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8357 ? " - val" : "",
8358 FormatMxcsr(paTests[iTest].fMxcsrIn));
8359 }
8360 }
8361
8362 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R32[iFn]);
8363 }
8364}
8365
8366
8367/*
8368 * Convert SSE operations converting signed double-words to single-precision floating point values.
8369 */
8370static SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
8371{
8372 ENTRY_BIN(cvtps2dq_u128),
8373 ENTRY_BIN(cvttps2dq_u128)
8374};
8375
8376#ifdef TSTIEMAIMPL_WITH_GENERATOR
8377DUMP_ALL_FN(SseConvertXmmR32I32, g_aSseConvertXmmR32I32)
8378static RTEXITCODE SseConvertXmmR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8379{
8380 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8381
8382 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
8383 {
8384 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8385 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8386 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8387 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8388 /** @todo More specials. */
8389 };
8390
8391 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8392 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8393 {
8394 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
8395
8396 IEMBINARYOUTPUT BinOut;
8397 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32I32[iFn]), RTEXITCODE_FAILURE);
8398
8399 uint32_t cNormalInputPairs = 0;
8400 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8401 {
8402 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8403
8404 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8405 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8406 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
8407 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
8408
8409 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
8410 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
8411 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
8412 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
8413 cNormalInputPairs++;
8414 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8415 {
8416 iTest -= 1;
8417 continue;
8418 }
8419
8420 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8421 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8422 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8423 for (uint8_t iFz = 0; iFz < 2; iFz++)
8424 {
8425 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8426 | (iRounding << X86_MXCSR_RC_SHIFT)
8427 | (iDaz ? X86_MXCSR_DAZ : 0)
8428 | (iFz ? X86_MXCSR_FZ : 0)
8429 | X86_MXCSR_XCPT_MASK;
8430 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8431 TestData.fMxcsrIn = uMxCsrIn;
8432 TestData.fMxcsrOut = uMxCsrOutM;
8433 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8434
8435 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8436 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8437 TestData.fMxcsrIn = uMxCsrIn;
8438 TestData.fMxcsrOut = uMxCsrOutU;
8439 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8440
8441 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8442 if (fXcpt)
8443 {
8444 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8445 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8446 TestData.fMxcsrIn = uMxCsrIn;
8447 TestData.fMxcsrOut = uMxCsrOut1;
8448 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8449
8450 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8451 {
8452 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8453 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8454 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8455 TestData.fMxcsrIn = uMxCsrIn;
8456 TestData.fMxcsrOut = uMxCsrOut2;
8457 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8458 }
8459 if (!RT_IS_POWER_OF_TWO(fXcpt))
8460 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8461 if (fUnmasked & fXcpt)
8462 {
8463 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8464 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8465 TestData.fMxcsrIn = uMxCsrIn;
8466 TestData.fMxcsrOut = uMxCsrOut3;
8467 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8468 }
8469 }
8470 }
8471 }
8472 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8473 }
8474
8475 return RTEXITCODE_SUCCESS;
8476}
8477#endif
8478
8479static void SseConvertXmmR32I32Test(void)
8480{
8481 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8482 {
8483 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32I32[iFn]))
8484 continue;
8485
8486 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
8487 uint32_t const cTests = g_aSseConvertXmmR32I32[iFn].cTests;
8488 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
8489 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
8490 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8491 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8492 {
8493 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8494 {
8495 X86XMMREG Res; RT_ZERO(Res);
8496
8497 uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8498 if ( fMxCsr != paTests[iTest].fMxcsrOut
8499 || Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8500 || Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8501 || Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8502 || Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8503 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
8504 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8505 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8506 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8507 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
8508 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
8509 iVar ? " " : "", fMxCsr,
8510 Res.ai32[0], Res.ai32[1],
8511 Res.ai32[2], Res.ai32[3],
8512 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8513 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8514 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8515 MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8516 ( Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8517 || Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8518 || Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8519 || Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8520 ? " - val" : "",
8521 FormatMxcsr(paTests[iTest].fMxcsrIn));
8522 }
8523 }
8524
8525 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32I32[iFn]);
8526 }
8527}
8528
8529
8530/*
8531 * Convert SSE operations converting signed double-words to double-precision floating point values.
8532 */
8533static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
8534{
8535 ENTRY_BIN(cvtdq2pd_u128)
8536};
8537
8538#ifdef TSTIEMAIMPL_WITH_GENERATOR
8539DUMP_ALL_FN(SseConvertXmmI32R64, g_aSseConvertXmmI32R64)
8540static RTEXITCODE SseConvertXmmI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
8541{
8542 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8543
8544 static int32_t const s_aSpecials[] =
8545 {
8546 INT32_MIN,
8547 INT32_MIN / 2,
8548 0,
8549 INT32_MAX / 2,
8550 INT32_MAX,
8551 (int32_t)0x80000000
8552 /** @todo More specials. */
8553 };
8554
8555 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8556 {
8557 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
8558
8559 IEMBINARYOUTPUT BinOut;
8560 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R64[iFn]), RTEXITCODE_FAILURE);
8561
8562 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8563 {
8564 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8565
8566 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8567 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8568 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8569 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8570
8571 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8572 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8573 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8574 for (uint8_t iFz = 0; iFz < 2; iFz++)
8575 {
8576 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8577 | (iRounding << X86_MXCSR_RC_SHIFT)
8578 | (iDaz ? X86_MXCSR_DAZ : 0)
8579 | (iFz ? X86_MXCSR_FZ : 0)
8580 | X86_MXCSR_XCPT_MASK;
8581 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8582 TestData.fMxcsrIn = uMxCsrIn;
8583 TestData.fMxcsrOut = uMxCsrOutM;
8584 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8585
8586 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8587 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8588 TestData.fMxcsrIn = uMxCsrIn;
8589 TestData.fMxcsrOut = uMxCsrOutU;
8590 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8591
8592 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8593 if (fXcpt)
8594 {
8595 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8596 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8597 TestData.fMxcsrIn = uMxCsrIn;
8598 TestData.fMxcsrOut = uMxCsrOut1;
8599 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8600
8601 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8602 {
8603 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8604 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8605 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8606 TestData.fMxcsrIn = uMxCsrIn;
8607 TestData.fMxcsrOut = uMxCsrOut2;
8608 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8609 }
8610 if (!RT_IS_POWER_OF_TWO(fXcpt))
8611 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8612 if (fUnmasked & fXcpt)
8613 {
8614 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8615 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8616 TestData.fMxcsrIn = uMxCsrIn;
8617 TestData.fMxcsrOut = uMxCsrOut3;
8618 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8619 }
8620 }
8621 }
8622 }
8623 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8624 }
8625
8626 return RTEXITCODE_SUCCESS;
8627}
8628#endif
8629
8630static void SseConvertXmmI32R64Test(void)
8631{
8632 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8633 {
8634 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R64[iFn]))
8635 continue;
8636
8637 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
8638 uint32_t const cTests = g_aSseConvertXmmI32R64[iFn].cTests;
8639 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
8640 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
8641 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8642 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8643 {
8644 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8645 {
8646 X86XMMREG Res; RT_ZERO(Res);
8647
8648 uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8649 if ( fMxCsr != paTests[iTest].fMxcsrOut
8650 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
8651 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8652 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8653 "%s -> mxcsr=%#08x %s'%s\n"
8654 "%s expected %#08x %s'%s%s%s (%s)\n",
8655 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8656 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8657 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8658 iVar ? " " : "", fMxCsr,
8659 FormatR64(&Res.ar64[0]), FormatR64(&Res.ar64[1]),
8660 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8661 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8662 MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8663 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[0], &paTests[iTest].OutVal.ar64[0])
8664 || !RTFLOAT64U_ARE_IDENTICAL(&Res.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8665 ? " - val" : "",
8666 FormatMxcsr(paTests[iTest].fMxcsrIn));
8667 }
8668 }
8669
8670 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R64[iFn]);
8671 }
8672}
8673
8674
8675/*
8676 * Convert SSE operations converting signed double-words to double-precision floating point values.
8677 */
8678static SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
8679{
8680 ENTRY_BIN(cvtpd2dq_u128),
8681 ENTRY_BIN(cvttpd2dq_u128)
8682};
8683
8684#ifdef TSTIEMAIMPL_WITH_GENERATOR
8685DUMP_ALL_FN(SseConvertXmmR64I32, g_aSseConvertXmmR64I32)
8686static RTEXITCODE SseConvertXmmR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8687{
8688 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8689
8690 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8691 {
8692 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8693 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8694 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8695 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8696 /** @todo More specials. */
8697 };
8698
8699 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8700 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8701 {
8702 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8703
8704 IEMBINARYOUTPUT BinOut;
8705 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64I32[iFn]), RTEXITCODE_FAILURE);
8706
8707 uint32_t cNormalInputPairs = 0;
8708 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8709 {
8710 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8711
8712 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8713 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8714
8715 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8716 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8717 cNormalInputPairs++;
8718 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8719 {
8720 iTest -= 1;
8721 continue;
8722 }
8723
8724 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8725 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8726 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8727 for (uint8_t iFz = 0; iFz < 2; iFz++)
8728 {
8729 uint32_t uMxCsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8730 | (iRounding << X86_MXCSR_RC_SHIFT)
8731 | (iDaz ? X86_MXCSR_DAZ : 0)
8732 | (iFz ? X86_MXCSR_FZ : 0)
8733 | X86_MXCSR_XCPT_MASK;
8734 uint32_t uMxCsrOutM = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8735 TestData.fMxcsrIn = uMxCsrIn;
8736 TestData.fMxcsrOut = uMxCsrOutM;
8737 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8738
8739 uMxCsrIn = uMxCsrIn & ~X86_MXCSR_XCPT_MASK;
8740 uint32_t uMxCsrOutU = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8741 TestData.fMxcsrIn = uMxCsrIn;
8742 TestData.fMxcsrOut = uMxCsrOutU;
8743 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8744
8745 uint16_t fXcpt = (uMxCsrOutM | uMxCsrOutU) & X86_MXCSR_XCPT_FLAGS;
8746 if (fXcpt)
8747 {
8748 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8749 uint32_t uMxCsrOut1 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8750 TestData.fMxcsrIn = uMxCsrIn;
8751 TestData.fMxcsrOut = uMxCsrOut1;
8752 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8753
8754 if (((uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS))
8755 {
8756 fXcpt |= uMxCsrOut1 & X86_MXCSR_XCPT_FLAGS;
8757 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8758 uint32_t uMxCsrOut2 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8759 TestData.fMxcsrIn = uMxCsrIn;
8760 TestData.fMxcsrOut = uMxCsrOut2;
8761 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8762 }
8763 if (!RT_IS_POWER_OF_TWO(fXcpt))
8764 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8765 if (fUnmasked & fXcpt)
8766 {
8767 uMxCsrIn = (uMxCsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8768 uint32_t uMxCsrOut3 = pfn(uMxCsrIn, &TestData.OutVal, &TestData.OutVal, &TestData.InVal);
8769 TestData.fMxcsrIn = uMxCsrIn;
8770 TestData.fMxcsrOut = uMxCsrOut3;
8771 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8772 }
8773 }
8774 }
8775 }
8776 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8777 }
8778
8779 return RTEXITCODE_SUCCESS;
8780}
8781#endif
8782
8783static void SseConvertXmmR64I32Test(void)
8784{
8785 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8786 {
8787 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64I32[iFn]))
8788 continue;
8789
8790 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8791 uint32_t const cTests = g_aSseConvertXmmR64I32[iFn].cTests;
8792 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8793 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8794 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8795 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8796 {
8797 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8798 {
8799 X86XMMREG Res; RT_ZERO(Res);
8800
8801 uint32_t fMxCsr = pfn(paTests[iTest].fMxcsrIn, &Res, &Res, &paTests[iTest].InVal);
8802 if ( fMxCsr != paTests[iTest].fMxcsrOut
8803 || Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8804 || Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8805 || Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8806 || Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8807 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8808 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8809 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8810 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8811 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8812 iVar ? " " : "", fMxCsr,
8813 Res.ai32[0], Res.ai32[1],
8814 Res.ai32[2], Res.ai32[3],
8815 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8816 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8817 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8818 MxcsrDiff(fMxCsr, paTests[iTest].fMxcsrOut),
8819 ( Res.ai32[0] != paTests[iTest].OutVal.ai32[0]
8820 || Res.ai32[1] != paTests[iTest].OutVal.ai32[1]
8821 || Res.ai32[2] != paTests[iTest].OutVal.ai32[2]
8822 || Res.ai32[3] != paTests[iTest].OutVal.ai32[3])
8823 ? " - val" : "",
8824 FormatMxcsr(paTests[iTest].fMxcsrIn));
8825 }
8826 }
8827
8828 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64I32[iFn]);
8829 }
8830}
8831
8832
8833/*
8834 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8835 */
8836TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8837
8838static SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8839{
8840 ENTRY_BIN(cvtpd2pi_u128),
8841 ENTRY_BIN(cvttpd2pi_u128)
8842};
8843
8844#ifdef TSTIEMAIMPL_WITH_GENERATOR
8845DUMP_ALL_FN(SseConvertMmXmm, g_aSseConvertMmXmm)
8846static RTEXITCODE SseConvertMmXmmGenerate(uint32_t cTests, const char * const *papszNameFmts)
8847{
8848 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8849
8850 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8851 {
8852 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8853 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8854 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8855 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8856 /** @todo More specials. */
8857 };
8858
8859 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8860 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8861 {
8862 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8863
8864 IEMBINARYOUTPUT BinOut;
8865 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmXmm[iFn]), RTEXITCODE_FAILURE);
8866
8867 uint32_t cNormalInputPairs = 0;
8868 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8869 {
8870 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8871
8872 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8873 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8874
8875 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8876 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8877 cNormalInputPairs++;
8878 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8879 {
8880 iTest -= 1;
8881 continue;
8882 }
8883
8884 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8885 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8886 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8887 for (uint8_t iFz = 0; iFz < 2; iFz++)
8888 {
8889 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8890 | (iRounding << X86_MXCSR_RC_SHIFT)
8891 | (iDaz ? X86_MXCSR_DAZ : 0)
8892 | (iFz ? X86_MXCSR_FZ : 0)
8893 | X86_MXCSR_XCPT_MASK;
8894 uint64_t u64ResM;
8895 uint32_t fMxcsrM = pfn(fMxcsrIn, &u64ResM, &TestData.InVal);
8896 TestData.fMxcsrIn = fMxcsrIn;
8897 TestData.fMxcsrOut = fMxcsrM;
8898 TestData.OutVal.u = u64ResM;
8899 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8900
8901 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8902 uint64_t u64ResU;
8903 uint32_t fMxcsrU = pfn(fMxcsrIn, &u64ResU, &TestData.InVal);
8904 TestData.fMxcsrIn = fMxcsrIn;
8905 TestData.fMxcsrOut = fMxcsrU;
8906 TestData.OutVal.u = u64ResU;
8907 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8908
8909 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8910 if (fXcpt)
8911 {
8912 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8913 uint64_t u64Res1;
8914 uint32_t fMxcsr1 = pfn(fMxcsrIn, &u64Res1, &TestData.InVal);
8915 TestData.fMxcsrIn = fMxcsrIn;
8916 TestData.fMxcsrOut = fMxcsr1;
8917 TestData.OutVal.u = u64Res1;
8918 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8919
8920 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8921 {
8922 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8923 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8924 uint64_t u64Res2;
8925 uint32_t fMxcsr2 = pfn(fMxcsrIn, &u64Res2, &TestData.InVal);
8926 TestData.fMxcsrIn = fMxcsrIn;
8927 TestData.fMxcsrOut = fMxcsr2;
8928 TestData.OutVal.u = u64Res2;
8929 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8930 }
8931 if (!RT_IS_POWER_OF_TWO(fXcpt))
8932 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8933 if (fUnmasked & fXcpt)
8934 {
8935 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8936 uint64_t u64Res3;
8937 uint32_t fMxcsr3 = pfn(fMxcsrIn, &u64Res3, &TestData.InVal);
8938 TestData.fMxcsrIn = fMxcsrIn;
8939 TestData.fMxcsrOut = fMxcsr3;
8940 TestData.OutVal.u = u64Res3;
8941 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8942 }
8943 }
8944 }
8945 }
8946 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8947 }
8948
8949 return RTEXITCODE_SUCCESS;
8950}
8951#endif
8952
8953static void SseConvertMmXmmTest(void)
8954{
8955 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8956 {
8957 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmXmm[iFn]))
8958 continue;
8959
8960 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8961 uint32_t const cTests = g_aSseConvertMmXmm[iFn].cTests;
8962 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8963 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8964 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8965 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8966 {
8967 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8968 {
8969 RTUINT64U ValOut;
8970 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut.u, &paTests[iTest].InVal);
8971 if ( fMxcsr != paTests[iTest].fMxcsrOut
8972 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8973 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8974 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8975 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8976 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8977 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8978 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8979 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8980 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8981 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8982 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8983 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8984 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8985 ? " - val" : "",
8986 FormatMxcsr(paTests[iTest].fMxcsrIn));
8987 }
8988 }
8989
8990 FREE_DECOMPRESSED_TESTS(g_aSseConvertMmXmm[iFn]);
8991 }
8992}
8993
8994
8995/*
8996 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8997 */
8998TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8999
9000static SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
9001{
9002 ENTRY_BIN(cvtpi2pd_u128)
9003};
9004
9005#ifdef TSTIEMAIMPL_WITH_GENERATOR
9006DUMP_ALL_FN(SseConvertXmmR64Mm, g_aSseConvertXmmR64Mm)
9007static RTEXITCODE SseConvertXmmR64MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9008{
9009 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9010
9011 static struct { int32_t aVal[2]; } const s_aSpecials[] =
9012 {
9013 { { INT32_MIN, INT32_MIN } },
9014 { { INT32_MAX, INT32_MAX } }
9015 /** @todo More specials. */
9016 };
9017
9018 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
9019 {
9020 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
9021
9022 IEMBINARYOUTPUT BinOut;
9023 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64Mm[iFn]), RTEXITCODE_FAILURE);
9024
9025 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9026 {
9027 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
9028
9029 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
9030 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
9031
9032 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9033 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9034 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9035 for (uint8_t iFz = 0; iFz < 2; iFz++)
9036 {
9037 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9038 | (iRounding << X86_MXCSR_RC_SHIFT)
9039 | (iDaz ? X86_MXCSR_DAZ : 0)
9040 | (iFz ? X86_MXCSR_FZ : 0)
9041 | X86_MXCSR_XCPT_MASK;
9042 uint32_t fMxcsrM = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9043 TestData.fMxcsrIn = fMxcsrIn;
9044 TestData.fMxcsrOut = fMxcsrM;
9045 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9046
9047 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9048 uint32_t fMxcsrU = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9049 TestData.fMxcsrIn = fMxcsrIn;
9050 TestData.fMxcsrOut = fMxcsrU;
9051 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9052
9053 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9054 if (fXcpt)
9055 {
9056 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
9057 uint32_t fMxcsr1 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9058 TestData.fMxcsrIn = fMxcsrIn;
9059 TestData.fMxcsrOut = fMxcsr1;
9060 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9061
9062 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9063 {
9064 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9065 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9066 uint32_t fMxcsr2 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9067 TestData.fMxcsrIn = fMxcsrIn;
9068 TestData.fMxcsrOut = fMxcsr2;
9069 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9070 }
9071 if (!RT_IS_POWER_OF_TWO(fXcpt))
9072 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9073 if (fUnmasked & fXcpt)
9074 {
9075 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9076 uint32_t fMxcsr3 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9077 TestData.fMxcsrIn = fMxcsrIn;
9078 TestData.fMxcsrOut = fMxcsr3;
9079 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9080 }
9081 }
9082 }
9083 }
9084 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9085 }
9086
9087 return RTEXITCODE_SUCCESS;
9088}
9089#endif
9090
9091static void SseConvertXmmR64MmTest(void)
9092{
9093 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
9094 {
9095 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64Mm[iFn]))
9096 continue;
9097
9098 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
9099 uint32_t const cTests = g_aSseConvertXmmR64Mm[iFn].cTests;
9100 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
9101 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
9102 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9103 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9104 {
9105 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9106 {
9107 X86XMMREG ValOut;
9108 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, paTests[iTest].InVal.u);
9109 if ( fMxcsr != paTests[iTest].fMxcsrOut
9110 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9111 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9112 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9113 "%s -> mxcsr=%#08x %s'%s\n"
9114 "%s expected %#08x %s'%s%s%s (%s)\n",
9115 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9116 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9117 iVar ? " " : "", fMxcsr,
9118 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
9119 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9120 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
9121 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9122 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9123 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9124 ? " - val" : "",
9125 FormatMxcsr(paTests[iTest].fMxcsrIn));
9126 }
9127 }
9128
9129 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64Mm[iFn]);
9130 }
9131}
9132
9133
9134/*
9135 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
9136 */
9137TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
9138
9139static SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
9140{
9141 ENTRY_BIN(cvtpi2ps_u128)
9142};
9143
9144#ifdef TSTIEMAIMPL_WITH_GENERATOR
9145DUMP_ALL_FN(SseConvertXmmR32Mm, g_aSseConvertXmmR32Mm)
9146static RTEXITCODE SseConvertXmmR32MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9147{
9148 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9149
9150 static struct { int32_t aVal[2]; } const s_aSpecials[] =
9151 {
9152 { { INT32_MIN, INT32_MIN } },
9153 { { INT32_MAX, INT32_MAX } }
9154 /** @todo More specials. */
9155 };
9156
9157 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9158 {
9159 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
9160
9161 IEMBINARYOUTPUT BinOut;
9162 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32Mm[iFn]), RTEXITCODE_FAILURE);
9163
9164 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9165 {
9166 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
9167
9168 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
9169 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
9170
9171 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9172 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9173 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9174 for (uint8_t iFz = 0; iFz < 2; iFz++)
9175 {
9176 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9177 | (iRounding << X86_MXCSR_RC_SHIFT)
9178 | (iDaz ? X86_MXCSR_DAZ : 0)
9179 | (iFz ? X86_MXCSR_FZ : 0)
9180 | X86_MXCSR_XCPT_MASK;
9181 uint32_t fMxcsrM = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9182 TestData.fMxcsrIn = fMxcsrIn;
9183 TestData.fMxcsrOut = fMxcsrM;
9184 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9185
9186 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9187 uint32_t fMxcsrU = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9188 TestData.fMxcsrIn = fMxcsrIn;
9189 TestData.fMxcsrOut = fMxcsrU;
9190 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9191
9192 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9193 if (fXcpt)
9194 {
9195 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
9196 uint32_t fMxcsr1 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9197 TestData.fMxcsrIn = fMxcsrIn;
9198 TestData.fMxcsrOut = fMxcsr1;
9199 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9200
9201 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9202 {
9203 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9204 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9205 uint32_t fMxcsr2 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9206 TestData.fMxcsrIn = fMxcsrIn;
9207 TestData.fMxcsrOut = fMxcsr2;
9208 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9209 }
9210 if (!RT_IS_POWER_OF_TWO(fXcpt))
9211 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9212 if (fUnmasked & fXcpt)
9213 {
9214 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9215 uint32_t fMxcsr3 = pfn(fMxcsrIn, &TestData.OutVal, TestData.InVal.u);
9216 TestData.fMxcsrIn = fMxcsrIn;
9217 TestData.fMxcsrOut = fMxcsr3;
9218 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9219 }
9220 }
9221 }
9222 }
9223 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9224 }
9225
9226 return RTEXITCODE_SUCCESS;
9227}
9228#endif
9229
9230static void SseConvertXmmR32MmTest(void)
9231{
9232 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9233 {
9234 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32Mm[iFn]))
9235 continue;
9236
9237 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
9238 uint32_t const cTests = g_aSseConvertXmmR32Mm[iFn].cTests;
9239 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
9240 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
9241 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9242 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9243 {
9244 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9245 {
9246 X86XMMREG ValOut;
9247 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut, paTests[iTest].InVal.u);
9248 if ( fMxcsr != paTests[iTest].fMxcsrOut
9249 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9250 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9251 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9252 "%s -> mxcsr=%#08x %s'%s\n"
9253 "%s expected %#08x %s'%s%s%s (%s)\n",
9254 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9255 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9256 iVar ? " " : "", fMxcsr,
9257 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
9258 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9259 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
9260 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9261 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9262 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9263 ? " - val" : "",
9264 FormatMxcsr(paTests[iTest].fMxcsrIn));
9265 }
9266 }
9267
9268 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32Mm[iFn]);
9269 }
9270}
9271
9272
9273/*
9274 * Convert SSE operations converting single-precision floating point values to signed double-word values.
9275 */
9276TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
9277
9278static SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
9279{
9280 ENTRY_BIN(cvtps2pi_u128),
9281 ENTRY_BIN(cvttps2pi_u128)
9282};
9283
9284#ifdef TSTIEMAIMPL_WITH_GENERATOR
9285DUMP_ALL_FN(SseConvertMmI32XmmR32, g_aSseConvertMmI32XmmR32)
9286static RTEXITCODE SseConvertMmI32XmmR32Generate(uint32_t cTests, const char * const *papszNameFmts)
9287{
9288 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9289
9290 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
9291 {
9292 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
9293 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
9294 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
9295 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
9296 /** @todo More specials. */
9297 };
9298
9299 uint32_t cMinNormalPairs = (cTests - 144) / 4;
9300 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9301 {
9302 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
9303
9304 IEMBINARYOUTPUT BinOut;
9305 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmI32XmmR32[iFn]), RTEXITCODE_FAILURE);
9306
9307 uint32_t cNormalInputPairs = 0;
9308 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9309 {
9310 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
9311
9312 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
9313 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
9314
9315 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
9316 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
9317 cNormalInputPairs++;
9318 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
9319 {
9320 iTest -= 1;
9321 continue;
9322 }
9323
9324 RTFLOAT64U TestVal;
9325 TestVal.au32[0] = TestData.ar32InVal[0].u;
9326 TestVal.au32[1] = TestData.ar32InVal[1].u;
9327
9328 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9329 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9330 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9331 for (uint8_t iFz = 0; iFz < 2; iFz++)
9332 {
9333 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9334 | (iRounding << X86_MXCSR_RC_SHIFT)
9335 | (iDaz ? X86_MXCSR_DAZ : 0)
9336 | (iFz ? X86_MXCSR_FZ : 0)
9337 | X86_MXCSR_XCPT_MASK;
9338 uint64_t u64ResM;
9339 uint32_t fMxcsrM = pfn(fMxcsrIn, &u64ResM, TestVal.u);
9340 TestData.fMxcsrIn = fMxcsrIn;
9341 TestData.fMxcsrOut = fMxcsrM;
9342 TestData.OutVal.u = u64ResM;
9343 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9344
9345 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9346 uint64_t u64ResU;
9347 uint32_t fMxcsrU = pfn(fMxcsrIn, &u64ResU, TestVal.u);
9348 TestData.fMxcsrIn = fMxcsrIn;
9349 TestData.fMxcsrOut = fMxcsrU;
9350 TestData.OutVal.u = u64ResU;
9351 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9352
9353 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9354 if (fXcpt)
9355 {
9356 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
9357 uint64_t u64Res1;
9358 uint32_t fMxcsr1 = pfn(fMxcsrIn, &u64Res1, TestVal.u);
9359 TestData.fMxcsrIn = fMxcsrIn;
9360 TestData.fMxcsrOut = fMxcsr1;
9361 TestData.OutVal.u = u64Res1;
9362 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9363
9364 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9365 {
9366 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9367 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9368 uint64_t u64Res2;
9369 uint32_t fMxcsr2 = pfn(fMxcsrIn, &u64Res2, TestVal.u);
9370 TestData.fMxcsrIn = fMxcsrIn;
9371 TestData.fMxcsrOut = fMxcsr2;
9372 TestData.OutVal.u = u64Res2;
9373 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9374 }
9375 if (!RT_IS_POWER_OF_TWO(fXcpt))
9376 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9377 if (fUnmasked & fXcpt)
9378 {
9379 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9380 uint64_t u64Res3;
9381 uint32_t fMxcsr3 = pfn(fMxcsrIn, &u64Res3, TestVal.u);
9382 TestData.fMxcsrIn = fMxcsrIn;
9383 TestData.fMxcsrOut = fMxcsr3;
9384 TestData.OutVal.u = u64Res3;
9385 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9386 }
9387 }
9388 }
9389 }
9390 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9391 }
9392
9393 return RTEXITCODE_SUCCESS;
9394}
9395#endif
9396
9397static void SseConvertMmI32XmmR32Test(void)
9398{
9399 X86FXSTATE State;
9400 RT_ZERO(State);
9401
9402 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9403 {
9404 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmI32XmmR32[iFn]))
9405 continue;
9406
9407 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
9408 uint32_t const cTests = g_aSseConvertMmI32XmmR32[iFn].cTests;
9409 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
9410 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
9411 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9412 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9413 {
9414 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9415 {
9416 RTUINT64U ValOut;
9417 RTUINT64U ValIn;
9418
9419 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
9420 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
9421
9422 uint32_t fMxcsr = pfn(paTests[iTest].fMxcsrIn, &ValOut.u, ValIn.u);
9423 if ( fMxcsr != paTests[iTest].fMxcsrOut
9424 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9425 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9426 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
9427 "%s -> mxcsr=%#08x %RI32'%RI32\n"
9428 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
9429 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9430 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
9431 iVar ? " " : "", fMxcsr,
9432 ValOut.ai32[0], ValOut.ai32[1],
9433 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9434 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
9435 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9436 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9437 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9438 ? " - val" : "",
9439 FormatMxcsr(paTests[iTest].fMxcsrIn));
9440 }
9441 }
9442
9443 FREE_DECOMPRESSED_TESTS(g_aSseConvertMmI32XmmR32[iFn]);
9444 }
9445}
9446
9447
9448/*
9449 * SSE 4.2 pcmpxstrx instructions.
9450 */
9451TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
9452
9453static SSE_PCMPISTRI_T g_aSsePcmpistri[] =
9454{
9455 ENTRY_BIN_SSE_OPT(pcmpistri_u128),
9456};
9457
9458#ifdef TSTIEMAIMPL_WITH_GENERATOR
9459DUMP_ALL_FN(SseComparePcmpistri, g_aSsePcmpistri)
9460static RTEXITCODE SseComparePcmpistriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9461{
9462 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9463
9464 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9465 {
9466 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9467 /** @todo More specials. */
9468 };
9469
9470 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9471 {
9472 PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
9473
9474 IEMBINARYOUTPUT BinOut;
9475 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistri[iFn]), RTEXITCODE_FAILURE);
9476
9477 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9478 {
9479 SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
9480
9481 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9482 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9483
9484 uint32_t const fEFlagsIn = RandEFlags();
9485 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9486 {
9487 uint32_t fEFlagsOut = fEFlagsIn;
9488 TestData.u32EcxOut = pfn(&fEFlagsOut, &TestData.InVal1.uXmm, &TestData.InVal2.uXmm, (uint8_t)u16Imm);
9489 TestData.fEFlagsIn = fEFlagsIn;
9490 TestData.fEFlagsOut = fEFlagsOut;
9491 TestData.bImm = (uint8_t)u16Imm;
9492 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9493 }
9494
9495 /* Repeat the test with the input value being the same. */
9496 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9497 {
9498 uint32_t fEFlagsOut = fEFlagsIn;
9499 TestData.u32EcxOut = pfn(&fEFlagsOut, &TestData.InVal1.uXmm, &TestData.InVal2.uXmm, (uint8_t)u16Imm);
9500 TestData.fEFlagsIn = fEFlagsIn;
9501 TestData.fEFlagsOut = fEFlagsOut;
9502 TestData.bImm = (uint8_t)u16Imm;
9503 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9504 }
9505 }
9506 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9507 }
9508
9509 return RTEXITCODE_SUCCESS;
9510}
9511#endif
9512
9513static void SseComparePcmpistriTest(void)
9514{
9515 X86FXSTATE State;
9516 RT_ZERO(State);
9517
9518 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9519 {
9520 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistri[iFn]))
9521 continue;
9522
9523 SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
9524 uint32_t const cTests = g_aSsePcmpistri[iFn].cTests;
9525 PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
9526 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
9527 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9528 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9529 {
9530 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9531 {
9532 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9533 uint32_t u32EcxOut = pfn(&fEFlags, &paTests[iTest].InVal1.uXmm, &paTests[iTest].InVal2.uXmm, paTests[iTest].bImm);
9534 if ( fEFlags != paTests[iTest].fEFlagsOut
9535 || u32EcxOut != paTests[iTest].u32EcxOut)
9536 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9537 "%s -> efl=%#08x %RU32\n"
9538 "%s expected %#08x %RU32%s%s\n",
9539 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9540 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9541 iVar ? " " : "", fEFlags, u32EcxOut,
9542 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9543 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9544 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9545 }
9546 }
9547
9548 FREE_DECOMPRESSED_TESTS(g_aSsePcmpistri[iFn]);
9549 }
9550}
9551
9552
9553TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
9554
9555static SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
9556{
9557 ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
9558};
9559
9560#ifdef TSTIEMAIMPL_WITH_GENERATOR
9561DUMP_ALL_FN(SseComparePcmpistrm, g_aSsePcmpistrm)
9562static RTEXITCODE SseComparePcmpistrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9563{
9564 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9565
9566 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9567 {
9568 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9569 /** @todo More specials. */
9570 };
9571
9572 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9573 {
9574 PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
9575
9576 IEMBINARYOUTPUT BinOut;
9577 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistrm[iFn]), RTEXITCODE_FAILURE);
9578
9579 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9580 {
9581 SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
9582
9583 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9584 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9585
9586 IEMPCMPISTRXSRC TestVal;
9587 TestVal.uSrc1 = TestData.InVal1.uXmm;
9588 TestVal.uSrc2 = TestData.InVal2.uXmm;
9589
9590 uint32_t const fEFlagsIn = RandEFlags();
9591 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9592 {
9593 uint32_t fEFlagsOut = fEFlagsIn;
9594 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9595 TestData.fEFlagsIn = fEFlagsIn;
9596 TestData.fEFlagsOut = fEFlagsOut;
9597 TestData.bImm = (uint8_t)u16Imm;
9598 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9599 }
9600
9601 /* Repeat the test with the input value being the same. */
9602 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9603 TestVal.uSrc1 = TestData.InVal1.uXmm;
9604 TestVal.uSrc2 = TestData.InVal2.uXmm;
9605
9606 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9607 {
9608 uint32_t fEFlagsOut = fEFlagsIn;
9609 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9610 TestData.fEFlagsIn = fEFlagsIn;
9611 TestData.fEFlagsOut = fEFlagsOut;
9612 TestData.bImm = (uint8_t)u16Imm;
9613 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9614 }
9615 }
9616 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9617 }
9618
9619 return RTEXITCODE_SUCCESS;
9620}
9621#endif
9622
9623static void SseComparePcmpistrmTest(void)
9624{
9625 X86FXSTATE State;
9626 RT_ZERO(State);
9627
9628 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9629 {
9630 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistrm[iFn]))
9631 continue;
9632
9633 SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9634 uint32_t const cTests = g_aSsePcmpistrm[iFn].cTests;
9635 PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9636 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9637 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9638 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9639 {
9640 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9641 {
9642 IEMPCMPISTRXSRC TestVal;
9643 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9644 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9645
9646 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9647 RTUINT128U OutVal;
9648 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9649 if ( fEFlags != paTests[iTest].fEFlagsOut
9650 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9651 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9652 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9653 "%s -> efl=%#08x %s\n"
9654 "%s expected %#08x %s%s%s\n",
9655 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9656 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9657 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9658 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9659 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9660 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9661 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9662 }
9663 }
9664
9665 FREE_DECOMPRESSED_TESTS(g_aSsePcmpistrm[iFn]);
9666 }
9667}
9668
9669
9670TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9671
9672static SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9673{
9674 ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9675};
9676
9677#ifdef TSTIEMAIMPL_WITH_GENERATOR
9678DUMP_ALL_FN(SseComparePcmpestri, g_aSsePcmpestri)
9679static RTEXITCODE SseComparePcmpestriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9680{
9681 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9682
9683 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9684 {
9685 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9686 /** @todo More specials. */
9687 };
9688
9689 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9690 {
9691 PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9692
9693 IEMBINARYOUTPUT BinOut;
9694 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestri[iFn]), RTEXITCODE_FAILURE);
9695
9696 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9697 {
9698 SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9699
9700 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9701 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9702
9703 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9704 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9705 {
9706 TestData.u64Rax = (uint64_t)i64Rax;
9707 TestData.u64Rdx = (uint64_t)i64Rdx;
9708
9709 IEMPCMPESTRXSRC TestVal;
9710 TestVal.uSrc1 = TestData.InVal1.uXmm;
9711 TestVal.uSrc2 = TestData.InVal2.uXmm;
9712 TestVal.u64Rax = TestData.u64Rax;
9713 TestVal.u64Rdx = TestData.u64Rdx;
9714
9715 uint32_t const fEFlagsIn = RandEFlags();
9716 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9717 {
9718 uint32_t fEFlagsOut = fEFlagsIn;
9719 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9720 TestData.fEFlagsIn = fEFlagsIn;
9721 TestData.fEFlagsOut = fEFlagsOut;
9722 TestData.bImm = (uint8_t)u16Imm;
9723 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9724 }
9725
9726 /* Repeat the test with the input value being the same. */
9727 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9728 TestVal.uSrc1 = TestData.InVal1.uXmm;
9729 TestVal.uSrc2 = TestData.InVal2.uXmm;
9730
9731 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9732 {
9733 uint32_t fEFlagsOut = fEFlagsIn;
9734 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9735 TestData.fEFlagsIn = fEFlagsIn;
9736 TestData.fEFlagsOut = fEFlagsOut;
9737 TestData.bImm = (uint8_t)u16Imm;
9738 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9739 }
9740 }
9741 }
9742 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9743 }
9744
9745 return RTEXITCODE_SUCCESS;
9746}
9747#endif
9748
9749static void SseComparePcmpestriTest(void)
9750{
9751 X86FXSTATE State;
9752 RT_ZERO(State);
9753
9754 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9755 {
9756 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestri[iFn]))
9757 continue;
9758
9759 SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9760 uint32_t const cTests = g_aSsePcmpestri[iFn].cTests;
9761 PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9762 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9763 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9764 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9765 {
9766 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9767 {
9768 IEMPCMPESTRXSRC TestVal;
9769 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9770 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9771 TestVal.u64Rax = paTests[iTest].u64Rax;
9772 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9773
9774 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9775 uint32_t u32EcxOut = 0;
9776 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9777 if ( fEFlags != paTests[iTest].fEFlagsOut
9778 || u32EcxOut != paTests[iTest].u32EcxOut)
9779 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9780 "%s -> efl=%#08x %RU32\n"
9781 "%s expected %#08x %RU32%s%s\n",
9782 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9783 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9784 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9785 paTests[iTest].bImm,
9786 iVar ? " " : "", fEFlags, u32EcxOut,
9787 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9788 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9789 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9790 }
9791 }
9792
9793 FREE_DECOMPRESSED_TESTS(g_aSsePcmpestri[iFn]);
9794 }
9795}
9796
9797
9798TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9799
9800static SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9801{
9802 ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9803};
9804
9805#ifdef TSTIEMAIMPL_WITH_GENERATOR
9806DUMP_ALL_FN(SseComparePcmpestrm, g_aSsePcmpestrm)
9807static RTEXITCODE SseComparePcmpestrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9808{
9809 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9810
9811 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9812 {
9813 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9814 /** @todo More specials. */
9815 };
9816
9817 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9818 {
9819 PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9820
9821 IEMBINARYOUTPUT BinOut;
9822 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestrm[iFn]), RTEXITCODE_FAILURE);
9823
9824 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9825 {
9826 SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9827
9828 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9829 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9830
9831 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9832 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9833 {
9834 TestData.u64Rax = (uint64_t)i64Rax;
9835 TestData.u64Rdx = (uint64_t)i64Rdx;
9836
9837 IEMPCMPESTRXSRC TestVal;
9838 TestVal.uSrc1 = TestData.InVal1.uXmm;
9839 TestVal.uSrc2 = TestData.InVal2.uXmm;
9840 TestVal.u64Rax = TestData.u64Rax;
9841 TestVal.u64Rdx = TestData.u64Rdx;
9842
9843 uint32_t const fEFlagsIn = RandEFlags();
9844 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9845 {
9846 uint32_t fEFlagsOut = fEFlagsIn;
9847 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9848 TestData.fEFlagsIn = fEFlagsIn;
9849 TestData.fEFlagsOut = fEFlagsOut;
9850 TestData.bImm = (uint8_t)u16Imm;
9851 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9852 }
9853
9854 /* Repeat the test with the input value being the same. */
9855 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9856 TestVal.uSrc1 = TestData.InVal1.uXmm;
9857 TestVal.uSrc2 = TestData.InVal2.uXmm;
9858
9859 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9860 {
9861 uint32_t fEFlagsOut = fEFlagsIn;
9862 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9863 TestData.fEFlagsIn = fEFlagsIn;
9864 TestData.fEFlagsOut = fEFlagsOut;
9865 TestData.bImm = (uint8_t)u16Imm;
9866 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9867 }
9868 }
9869 }
9870 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9871 }
9872
9873 return RTEXITCODE_SUCCESS;
9874}
9875#endif
9876
9877static void SseComparePcmpestrmTest(void)
9878{
9879 X86FXSTATE State;
9880 RT_ZERO(State);
9881
9882 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9883 {
9884 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestrm[iFn]))
9885 continue;
9886
9887 SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9888 uint32_t const cTests = g_aSsePcmpestrm[iFn].cTests;
9889 PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9890 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9891 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9892 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9893 {
9894 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9895 {
9896 IEMPCMPESTRXSRC TestVal;
9897 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9898 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9899 TestVal.u64Rax = paTests[iTest].u64Rax;
9900 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9901
9902 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9903 RTUINT128U OutVal;
9904 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9905 if ( fEFlags != paTests[iTest].fEFlagsOut
9906 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9907 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9908 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9909 "%s -> efl=%#08x %s\n"
9910 "%s expected %#08x %s%s%s\n",
9911 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9912 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9913 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9914 paTests[iTest].bImm,
9915 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9916 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9917 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9918 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9919 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9920 }
9921 }
9922
9923 FREE_DECOMPRESSED_TESTS(g_aSsePcmpestrm[iFn]);
9924 }
9925}
9926
9927
9928
9929int main(int argc, char **argv)
9930{
9931 int rc = RTR3InitExe(argc, &argv, 0);
9932 if (RT_FAILURE(rc))
9933 return RTMsgInitFailure(rc);
9934
9935 /*
9936 * Determin the host CPU.
9937 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
9938 */
9939#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9940 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
9941 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9942 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9943#else
9944 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9945#endif
9946
9947 /*
9948 * Parse arguments.
9949 */
9950 enum { kModeNotSet, kModeTest, kModeGenerate, kModeDump }
9951 enmMode = kModeNotSet;
9952#define CATEGORY_INT RT_BIT_32(0)
9953#define CATEGORY_FPU_LD_ST RT_BIT_32(1)
9954#define CATEGORY_FPU_BINARY_1 RT_BIT_32(2)
9955#define CATEGORY_FPU_BINARY_2 RT_BIT_32(3)
9956#define CATEGORY_FPU_OTHER RT_BIT_32(4)
9957#define CATEGORY_SSE_FP_BINARY RT_BIT_32(5)
9958#define CATEGORY_SSE_FP_OTHER RT_BIT_32(6)
9959#define CATEGORY_SSE_PCMPXSTRX RT_BIT_32(7)
9960 uint32_t fCategories = UINT32_MAX;
9961 bool fCpuData = true;
9962 bool fCommonData = true;
9963 uint32_t const cDefaultTests = 96;
9964 uint32_t cTests = cDefaultTests;
9965
9966 RTGETOPTDEF const s_aOptions[] =
9967 {
9968 // mode:
9969 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
9970 { "--dump", 'G', RTGETOPT_REQ_NOTHING },
9971 { "--test", 't', RTGETOPT_REQ_NOTHING },
9972 { "--benchmark", 'b', RTGETOPT_REQ_NOTHING },
9973 // test selection (both)
9974 { "--all", 'a', RTGETOPT_REQ_NOTHING },
9975 { "--none", 'z', RTGETOPT_REQ_NOTHING },
9976 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
9977 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9978 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9979 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9980 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9981 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9982 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9983 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9984 { "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9985 { "--int", 'i', RTGETOPT_REQ_NOTHING },
9986 { "--include", 'I', RTGETOPT_REQ_STRING },
9987 { "--exclude", 'X', RTGETOPT_REQ_STRING },
9988 // generation parameters
9989 { "--common", 'm', RTGETOPT_REQ_NOTHING },
9990 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
9991 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
9992 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
9993 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
9994 { "--quiet-skipping", 'Q', RTGETOPT_REQ_NOTHING },
9995 };
9996
9997 RTGETOPTSTATE State;
9998 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
9999 AssertRCReturn(rc, RTEXITCODE_FAILURE);
10000
10001 RTGETOPTUNION ValueUnion;
10002 while ((rc = RTGetOpt(&State, &ValueUnion)))
10003 {
10004 switch (rc)
10005 {
10006 case 'g':
10007 enmMode = kModeGenerate;
10008 g_cPicoSecBenchmark = 0;
10009 break;
10010 case 'G':
10011 enmMode = kModeDump;
10012 g_cPicoSecBenchmark = 0;
10013 break;
10014 case 't':
10015 enmMode = kModeTest;
10016 g_cPicoSecBenchmark = 0;
10017 break;
10018 case 'b':
10019 enmMode = kModeTest;
10020 g_cPicoSecBenchmark += RT_NS_1SEC / 2 * UINT64_C(1000); /* half a second in pico seconds */
10021 break;
10022
10023 case 'a':
10024 fCpuData = true;
10025 fCommonData = true;
10026 fCategories = UINT32_MAX;
10027 break;
10028 case 'z':
10029 fCpuData = false;
10030 fCommonData = false;
10031 fCategories = 0;
10032 break;
10033
10034 case 'F':
10035 fCategories |= CATEGORY_FPU_LD_ST;
10036 break;
10037 case 'O':
10038 fCategories |= CATEGORY_FPU_OTHER;
10039 break;
10040 case 'B':
10041 fCategories |= CATEGORY_FPU_BINARY_1;
10042 break;
10043 case 'P':
10044 fCategories |= CATEGORY_FPU_BINARY_2;
10045 break;
10046 case 'S':
10047 fCategories |= CATEGORY_SSE_FP_BINARY;
10048 break;
10049 case 'T':
10050 fCategories |= CATEGORY_SSE_FP_OTHER;
10051 break;
10052 case 'C':
10053 fCategories |= CATEGORY_SSE_PCMPXSTRX;
10054 break;
10055 case 'i':
10056 fCategories |= CATEGORY_INT;
10057 break;
10058
10059 case 'I':
10060 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
10061 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
10062 RT_ELEMENTS(g_apszIncludeTestPatterns));
10063 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
10064 break;
10065 case 'X':
10066 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
10067 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
10068 RT_ELEMENTS(g_apszExcludeTestPatterns));
10069 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
10070 break;
10071
10072 case 'm':
10073 fCommonData = true;
10074 break;
10075 case 'c':
10076 fCpuData = true;
10077 break;
10078 case 'n':
10079 cTests = ValueUnion.u32;
10080 break;
10081
10082 case 'q':
10083 g_cVerbosity = 0;
10084 break;
10085 case 'v':
10086 g_cVerbosity++;
10087 break;
10088 case 'Q':
10089 g_fVerboseSkipping = false;
10090 break;
10091
10092 case 'h':
10093 RTPrintf("usage: %Rbn <-g|-t> [options]\n"
10094 "\n"
10095 "Mode:\n"
10096 " -g, --generate\n"
10097 " Generate test data.\n"
10098 " -t, --test\n"
10099 " Execute tests.\n"
10100 " -b, --benchmark\n"
10101 " Execute tests and do 1/2 seconds of benchmarking.\n"
10102 " Repeating the option increases the benchmark duration by 0.5 seconds.\n"
10103 "\n"
10104 "Test selection (both modes):\n"
10105 " -a, --all\n"
10106 " Enable all tests and generated test data. (default)\n"
10107 " -z, --zap, --none\n"
10108 " Disable all tests and test data types.\n"
10109 " -i, --int\n"
10110 " Enable non-FPU tests.\n"
10111 " -F, --fpu-ld-st\n"
10112 " Enable FPU load and store tests.\n"
10113 " -B, --fpu-binary-1\n"
10114 " Enable FPU binary 80-bit FP tests.\n"
10115 " -P, --fpu-binary-2\n"
10116 " Enable FPU binary 64- and 32-bit FP tests.\n"
10117 " -O, --fpu-other\n"
10118 " Enable FPU binary 64- and 32-bit FP tests.\n"
10119 " -S, --sse-fp-binary\n"
10120 " Enable SSE binary 64- and 32-bit FP tests.\n"
10121 " -T, --sse-fp-other\n"
10122 " Enable misc SSE 64- and 32-bit FP tests.\n"
10123 " -C, --sse-pcmpxstrx\n"
10124 " Enable SSE pcmpxstrx tests.\n"
10125 " -I,--include=<test-patter>\n"
10126 " Enable tests matching the given pattern.\n"
10127 " -X,--exclude=<test-patter>\n"
10128 " Skip tests matching the given pattern (overrides --include).\n"
10129 "\n"
10130 "Generation:\n"
10131 " -m, --common\n"
10132 " Enable generating common test data.\n"
10133 " -c, --only-cpu\n"
10134 " Enable generating CPU specific test data.\n"
10135 " -n, --number-of-test <count>\n"
10136 " Number of tests to generate. Default: %u\n"
10137 "\n"
10138 "Other:\n"
10139 " -v, --verbose\n"
10140 " -q, --quiet\n"
10141 " Noise level. Default: --quiet\n"
10142 " -Q, --quiet-skipping\n"
10143 " Don't display skipped tests.\n"
10144 "\n"
10145 "Tip! When working on a single instruction, use the the -I and -Q options to\n"
10146 " restrict the testing: %Rbn -tiQI \"shr_*\"\n"
10147 , argv[0], cDefaultTests, argv[0]);
10148 return RTEXITCODE_SUCCESS;
10149 default:
10150 return RTGetOptPrintError(rc, &ValueUnion);
10151 }
10152 }
10153
10154 static const struct
10155 {
10156 uint32_t fCategory;
10157 void (*pfnTest)(void);
10158#ifdef TSTIEMAIMPL_WITH_GENERATOR
10159 const char *pszFilenameFmt;
10160 RTEXITCODE (*pfnGenerate)(uint32_t cTests, const char * const *papszNameFmts);
10161 RTEXITCODE (*pfnDumpAll)(const char * const *papszNameFmts);
10162 uint32_t cMinTests;
10163# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10164 { a_fCategory, a_BaseNm ## Test, a_szFilenameFmt, a_BaseNm ## Generate, a_BaseNm ## DumpAll, a_cMinTests }
10165#else
10166# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10167 { a_fCategory, a_BaseNm ## Test }
10168#endif
10169#define GROUP_ENTRY_MANUAL(a_fCategory, a_BaseNm) \
10170 { a_fCategory, a_BaseNm ## Test }
10171 } s_aGroups[] =
10172 {
10173 GROUP_ENTRY(CATEGORY_INT, BinU8, "tstIEMAImplDataInt-%s.bin.gz", 0),
10174 GROUP_ENTRY(CATEGORY_INT, BinU16, "tstIEMAImplDataInt-%s.bin.gz", 0),
10175 GROUP_ENTRY(CATEGORY_INT, BinU32, "tstIEMAImplDataInt-%s.bin.gz", 0),
10176 GROUP_ENTRY(CATEGORY_INT, BinU64, "tstIEMAImplDataInt-%s.bin.gz", 0),
10177 GROUP_ENTRY(CATEGORY_INT, ShiftDbl, "tstIEMAImplDataInt-%s.bin.gz", 128),
10178 GROUP_ENTRY(CATEGORY_INT, Unary, "tstIEMAImplDataInt-%s.bin.gz", 0),
10179 GROUP_ENTRY(CATEGORY_INT, Shift, "tstIEMAImplDataInt-%s.bin.gz", 0),
10180 GROUP_ENTRY(CATEGORY_INT, MulDiv, "tstIEMAImplDataInt-%s.bin.gz", 0),
10181 GROUP_ENTRY_MANUAL(CATEGORY_INT, Xchg),
10182 GROUP_ENTRY_MANUAL(CATEGORY_INT, Xadd),
10183 GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg),
10184 GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg8b),
10185 GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg16b),
10186 GROUP_ENTRY_MANUAL(CATEGORY_INT, Bswap),
10187
10188 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdConst, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10189 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10190 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10191 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10192
10193 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10194 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10195 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10196
10197 GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10198 GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryFswR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10199 GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryEflR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10200
10201 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10202 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10203 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10204 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10205
10206 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10207 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10208 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10209 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10210
10211 GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10212 GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryFswR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10213 GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryTwoR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10214
10215 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10216 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10217 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10218 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10219
10220 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10221 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10222 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10223 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10224
10225 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10226 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10227 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10228 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10229
10230 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR32R32, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10231 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR64R64, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10232 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF2XmmR32Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10233 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF2XmmR64Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10234
10235 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10236 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10237 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R64, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10238 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10239 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmXmm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10240 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10241 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10242 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmI32XmmR32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10243
10244 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10245 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10246 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10247 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10248 };
10249
10250 /*
10251 * Generate data?
10252 */
10253 if (enmMode == kModeGenerate)
10254 {
10255#ifdef TSTIEMAIMPL_WITH_GENERATOR
10256 if (cTests == 0)
10257 cTests = cDefaultTests;
10258 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
10259 g_cZeroSrcTests = g_cZeroDstTests * 2;
10260
10261 RTMpGetDescription(NIL_RTCPUID, g_szCpuDesc, sizeof(g_szCpuDesc));
10262
10263 /* For the revision, use the highest for this file and VBoxRT. */
10264 static const char s_szRev[] = "$Revision: 104269 $";
10265 const char *pszRev = s_szRev;
10266 while (*pszRev && !RT_C_IS_DIGIT(*pszRev))
10267 pszRev++;
10268 g_uSvnRev = RTStrToUInt32(pszRev);
10269 g_uSvnRev = RT_MAX(g_uSvnRev, RTBldCfgRevision());
10270
10271 /* Loop thru the groups and call the generate for any that's enabled. */
10272 for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10273 if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnGenerate)
10274 {
10275 const char * const apszNameFmts[] =
10276 {
10277 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10278 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10279 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10280 };
10281 RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10282 if (rcExit != RTEXITCODE_SUCCESS)
10283 return rcExit;
10284 }
10285 return RTEXITCODE_SUCCESS;
10286#else
10287 return RTMsgErrorExitFailure("Test data generator not compiled in!");
10288#endif
10289 }
10290
10291 /*
10292 * Dump tables (used for the conversion, mostly useless now).
10293 */
10294 if (enmMode == kModeDump)
10295 {
10296#ifdef TSTIEMAIMPL_WITH_GENERATOR
10297 /* Loop thru the groups and call the generate for any that's enabled. */
10298 for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10299 if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnDumpAll)
10300 {
10301 const char * const apszNameFmts[] =
10302 {
10303 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10304 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10305 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10306 };
10307 RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10308 if (rcExit != RTEXITCODE_SUCCESS)
10309 return rcExit;
10310 }
10311 return RTEXITCODE_SUCCESS;
10312#else
10313 return RTMsgErrorExitFailure("Test data generator not compiled in!");
10314#endif
10315 }
10316
10317
10318 /*
10319 * Do testing. Currrently disabled by default as data needs to be checked
10320 * on both intel and AMD systems first.
10321 */
10322 rc = RTTestCreate("tstIEMAImpl", &g_hTest);
10323 AssertRCReturn(rc, RTEXITCODE_FAILURE);
10324 if (enmMode == kModeTest)
10325 {
10326 RTTestBanner(g_hTest);
10327
10328 /* Allocate guarded memory for use in the tests. */
10329#define ALLOC_GUARDED_VAR(a_puVar) do { \
10330 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
10331 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
10332 } while (0)
10333 ALLOC_GUARDED_VAR(g_pu8);
10334 ALLOC_GUARDED_VAR(g_pu16);
10335 ALLOC_GUARDED_VAR(g_pu32);
10336 ALLOC_GUARDED_VAR(g_pu64);
10337 ALLOC_GUARDED_VAR(g_pu128);
10338 ALLOC_GUARDED_VAR(g_pu8Two);
10339 ALLOC_GUARDED_VAR(g_pu16Two);
10340 ALLOC_GUARDED_VAR(g_pu32Two);
10341 ALLOC_GUARDED_VAR(g_pu64Two);
10342 ALLOC_GUARDED_VAR(g_pu128Two);
10343 ALLOC_GUARDED_VAR(g_pfEfl);
10344 if (RTTestErrorCount(g_hTest) == 0)
10345 {
10346 /* Loop thru the groups and call test function for anything that's enabled. */
10347 for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10348 if ((s_aGroups[i].fCategory & fCategories))
10349 s_aGroups[i].pfnTest();
10350 }
10351 return RTTestSummaryAndDestroy(g_hTest);
10352 }
10353 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
10354}
10355
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use