VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatArmV8.cpp@ 101381

Last change on this file since 101381 was 99321, checked in by vboxsync, 14 months ago

Disassember: Continue work on the ARMv8 disassember, defining the instruction table layout and intermediate structures for decoding, bugref:10394 [scm fix]

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.8 KB
Line 
1/* $Id: DisasmFormatArmV8.cpp 99321 2023-04-06 19:36:23Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
7 * Copyright (C) 2008-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include <VBox/dis.h>
33#include "DisasmInternal.h"
34#include <iprt/assert.h>
35#include <iprt/ctype.h>
36#include <iprt/errcore.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Global Variables *
42*********************************************************************************************************************************/
43static const char g_szSpaces[] =
44" ";
45static const char g_aszArmV8RegGen32[32][4] =
46{
47 "w0\0", "w1\0", "w2\0", "w3\0", "w4\0", "w5\0", "w6\0", "w7\0", "w8\0", "w9\0", "w10", "w11", "w12", "w13", "w14", "w15",
48 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "zr"
49};
50static const char g_aszArmV8RegGen64[32][4] =
51{
52 "x0\0", "x1\0", "x2\0", "x3\0", "x4\0", "x5\0", "x6\0", "x7\0", "x8\0", "x9\0", "x10", "x11", "x12", "x13", "x14", "x15",
53 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "zr"
54};
55
56
57/**
58 * Gets the base register name for the given parameter.
59 *
60 * @returns Pointer to the register name.
61 * @param pDis The disassembler state.
62 * @param pParam The parameter.
63 * @param pcchReg Where to store the length of the name.
64 */
65static const char *disasmFormatArmV8Reg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
66{
67 RT_NOREF_PV(pDis);
68
69 switch (pParam->fUse & ( DISUSE_REG_GEN8 | DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64
70 | DISUSE_REG_FP | DISUSE_REG_MMX | DISUSE_REG_XMM | DISUSE_REG_YMM
71 | DISUSE_REG_CR | DISUSE_REG_DBG | DISUSE_REG_SEG | DISUSE_REG_TEST))
72
73 {
74 case DISUSE_REG_GEN32:
75 {
76 Assert(pParam->arch.armv8.Reg.idxGenReg < RT_ELEMENTS(g_aszArmV8RegGen32));
77 const char *psz = g_aszArmV8RegGen32[pParam->arch.armv8.Reg.idxGenReg];
78 *pcchReg = 2 + !!psz[2];
79 return psz;
80 }
81
82 case DISUSE_REG_GEN64:
83 {
84 Assert(pParam->arch.armv8.Reg.idxGenReg < RT_ELEMENTS(g_aszArmV8RegGen64));
85 const char *psz = g_aszArmV8RegGen64[pParam->arch.armv8.Reg.idxGenReg];
86 *pcchReg = 2 + !!psz[2];
87 return psz;
88 return psz;
89 }
90
91 default:
92 AssertMsgFailed(("%#x\n", pParam->fUse));
93 *pcchReg = 3;
94 return "r??";
95 }
96}
97
98
99/**
100 * Formats the current instruction in Yasm (/ Nasm) style.
101 *
102 *
103 * @returns The number of output characters. If this is >= cchBuf, then the content
104 * of pszBuf will be truncated.
105 * @param pDis Pointer to the disassembler state.
106 * @param pszBuf The output buffer.
107 * @param cchBuf The size of the output buffer.
108 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
109 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
110 * @param pvUser User argument for pfnGetSymbol.
111 */
112DISDECL(size_t) DISFormatArmV8Ex(PCDISSTATE pDis, char *pszBuf, size_t cchBuf, uint32_t fFlags,
113 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
114{
115/** @todo monitor and mwait aren't formatted correctly in 64-bit mode. */
116 /*
117 * Input validation and massaging.
118 */
119 AssertPtr(pDis);
120 AssertPtrNull(pszBuf);
121 Assert(pszBuf || !cchBuf);
122 AssertPtrNull(pfnGetSymbol);
123 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
124 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
125 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
126 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
127 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
128
129 PCDISOPCODE const pOp = pDis->pCurInstr;
130
131 /*
132 * Output macros
133 */
134 char *pszDst = pszBuf;
135 size_t cchDst = cchBuf;
136 size_t cchOutput = 0;
137#define PUT_C(ch) \
138 do { \
139 cchOutput++; \
140 if (cchDst > 1) \
141 { \
142 cchDst--; \
143 *pszDst++ = (ch); \
144 } \
145 } while (0)
146#define PUT_STR(pszSrc, cchSrc) \
147 do { \
148 cchOutput += (cchSrc); \
149 if (cchDst > (cchSrc)) \
150 { \
151 memcpy(pszDst, (pszSrc), (cchSrc)); \
152 pszDst += (cchSrc); \
153 cchDst -= (cchSrc); \
154 } \
155 else if (cchDst > 1) \
156 { \
157 memcpy(pszDst, (pszSrc), cchDst - 1); \
158 pszDst += cchDst - 1; \
159 cchDst = 1; \
160 } \
161 } while (0)
162#define PUT_SZ(sz) \
163 PUT_STR((sz), sizeof(sz) - 1)
164#define PUT_SZ_STRICT(szStrict, szRelaxed) \
165 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
166#define PUT_PSZ(psz) \
167 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
168#define PUT_NUM(cch, fmt, num) \
169 do { \
170 cchOutput += (cch); \
171 if (cchDst > 1) \
172 { \
173 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
174 pszDst += cchTmp; \
175 cchDst -= cchTmp; \
176 Assert(cchTmp == (cch) || cchDst == 1); \
177 } \
178 } while (0)
179/** @todo add two flags for choosing between %X / %x and h / 0x. */
180#define PUT_NUM_8(num) PUT_NUM(4, "0x%02x", (uint8_t)(num))
181#define PUT_NUM_16(num) PUT_NUM(6, "0x%04x", (uint16_t)(num))
182#define PUT_NUM_32(num) PUT_NUM(10, "0x%08x", (uint32_t)(num))
183#define PUT_NUM_64(num) PUT_NUM(18, "0x%016RX64", (uint64_t)(num))
184
185#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
186 do { \
187 if ((stype)(num) >= 0) \
188 { \
189 PUT_C('+'); \
190 PUT_NUM(cch, fmt, (utype)(num)); \
191 } \
192 else \
193 { \
194 PUT_C('-'); \
195 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
196 } \
197 } while (0)
198#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, "0x%02x", num, int8_t, uint8_t)
199#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, "0x%04x", num, int16_t, uint16_t)
200#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, "0x%08x", num, int32_t, uint32_t)
201#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, "0x%016RX64", num, int64_t, uint64_t)
202
203#define PUT_SYMBOL_TWO(a_rcSym, a_szStart, a_chEnd) \
204 do { \
205 if (RT_SUCCESS(a_rcSym)) \
206 { \
207 PUT_SZ(a_szStart); \
208 PUT_PSZ(szSymbol); \
209 if (off != 0) \
210 { \
211 if ((int8_t)off == off) \
212 PUT_NUM_S8(off); \
213 else if ((int16_t)off == off) \
214 PUT_NUM_S16(off); \
215 else if ((int32_t)off == off) \
216 PUT_NUM_S32(off); \
217 else \
218 PUT_NUM_S64(off); \
219 } \
220 PUT_C(a_chEnd); \
221 } \
222 } while (0)
223
224#define PUT_SYMBOL(a_uSeg, a_uAddr, a_szStart, a_chEnd) \
225 do { \
226 if (pfnGetSymbol) \
227 { \
228 int rcSym = pfnGetSymbol(pDis, a_uSeg, a_uAddr, szSymbol, sizeof(szSymbol), &off, pvUser); \
229 PUT_SYMBOL_TWO(rcSym, a_szStart, a_chEnd); \
230 } \
231 } while (0)
232
233
234 /*
235 * The address?
236 */
237 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
238 {
239#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
240 if (pDis->uInstrAddr >= _4G)
241 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
242#endif
243 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
244 PUT_C(' ');
245 }
246
247 /*
248 * The opcode bytes?
249 */
250 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
251 {
252 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
253 cchOutput += cchTmp;
254 if (cchDst > 1)
255 {
256 if (cchTmp <= cchDst)
257 {
258 cchDst -= cchTmp;
259 pszDst += cchTmp;
260 }
261 else
262 {
263 pszDst += cchDst - 1;
264 cchDst = 1;
265 }
266 }
267
268 /* Some padding to align the instruction. */
269 size_t cchPadding = (7 * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
270 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
271 + 2;
272 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
273 PUT_STR(g_szSpaces, cchPadding);
274 }
275
276
277 /*
278 * Filter out invalid opcodes first as they need special
279 * treatment. UDF is an exception and should be handled normally.
280 */
281 size_t const offInstruction = cchOutput;
282 if (pOp->uOpcode == OP_INVALID)
283 PUT_SZ("Illegal opcode");
284 else
285 {
286 /*
287 * Formatting context and associated macros.
288 */
289 PCDISOPPARAM pParam = &pDis->Param1;
290 int iParam = 1;
291
292 const char *pszFmt = pOp->pszOpcode;
293
294 /*
295 * The formatting loop.
296 */
297 RTINTPTR off;
298 char szSymbol[128];
299 char ch;
300 while ((ch = *pszFmt++) != '\0')
301 {
302 if (ch == '%')
303 {
304 ch = *pszFmt++;
305 switch (ch)
306 {
307 case 'I': /* Immediate data. */
308 PUT_C('#');
309 switch (pParam->fUse & ( DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64
310 | DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
311 {
312 case DISUSE_IMMEDIATE8:
313 PUT_NUM_8(pParam->uValue);
314 break;
315 case DISUSE_IMMEDIATE16:
316 PUT_NUM_16(pParam->uValue);
317 break;
318 case DISUSE_IMMEDIATE16_SX8:
319 PUT_NUM_16(pParam->uValue);
320 break;
321 case DISUSE_IMMEDIATE32:
322 PUT_NUM_32(pParam->uValue);
323 /** @todo Symbols */
324 break;
325 case DISUSE_IMMEDIATE32_SX8:
326 PUT_NUM_32(pParam->uValue);
327 break;
328 case DISUSE_IMMEDIATE64_SX8:
329 PUT_NUM_64(pParam->uValue);
330 break;
331 case DISUSE_IMMEDIATE64:
332 PUT_NUM_64(pParam->uValue);
333 /** @todo Symbols */
334 break;
335 default:
336 AssertFailed();
337 break;
338 }
339 break;
340
341 case 'X': /* Register. */
342 {
343 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
344 Assert(!(pParam->fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)));
345
346 size_t cchReg;
347 const char *pszReg = disasmFormatArmV8Reg(pDis, pParam, &cchReg);
348 PUT_STR(pszReg, cchReg);
349 break;
350 }
351
352 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
353 {
354 int32_t offDisplacement;
355
356 PUT_C('#');
357 if (pParam->fUse & DISUSE_IMMEDIATE8_REL)
358 {
359 offDisplacement = (int8_t)pParam->uValue;
360 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
361 PUT_NUM_S8(offDisplacement);
362 }
363 else if (pParam->fUse & DISUSE_IMMEDIATE16_REL)
364 {
365 offDisplacement = (int16_t)pParam->uValue;
366 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
367 PUT_NUM_S16(offDisplacement);
368 }
369 else
370 {
371 offDisplacement = (int32_t)pParam->uValue;
372 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
373 PUT_NUM_S32(offDisplacement);
374 }
375 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
376 PUT_SZ(" (");
377
378 RTUINTPTR uTrgAddr = pDis->uInstrAddr + pDis->cbInstr + offDisplacement;
379 if ( pDis->uCpuMode == DISCPUMODE_ARMV8_A32
380 || pDis->uCpuMode == DISCPUMODE_ARMV8_T32)
381 PUT_NUM_32(uTrgAddr);
382 else if (pDis->uCpuMode == DISCPUMODE_ARMV8_A64)
383 PUT_NUM_64(uTrgAddr);
384 else
385 AssertReleaseFailed();
386
387 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
388 {
389 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " = ", ' ');
390 PUT_C(')');
391 }
392 else
393 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " (", ')');
394 break;
395 }
396
397 case 'C': /* Conditional */
398 {
399 /** @todo */
400 /* Skip any whitespace coming after (as this is not really part of the parameters). */
401 while (*pszFmt == ' ')
402 pszFmt++;
403
404 switch (++iParam)
405 {
406 case 2: pParam = &pDis->Param2; break;
407 case 3: pParam = &pDis->Param3; break;
408 case 4: pParam = &pDis->Param4; break;
409 default: pParam = NULL; break;
410 }
411 break;
412 }
413
414 default:
415 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
416 break;
417 }
418 AssertMsg(*pszFmt == ',' || *pszFmt == '\0' || *pszFmt == '%', ("%c%s\n", ch, pszFmt));
419 }
420 else
421 {
422 PUT_C(ch);
423 if (ch == ',')
424 {
425 Assert(*pszFmt != ' ');
426 PUT_C(' ');
427 switch (++iParam)
428 {
429 case 2: pParam = &pDis->Param2; break;
430 case 3: pParam = &pDis->Param3; break;
431 case 4: pParam = &pDis->Param4; break;
432 default: pParam = NULL; break;
433 }
434 }
435 }
436 } /* while more to format */
437 }
438
439 /*
440 * Any additional output to the right of the instruction?
441 */
442 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
443 {
444 /* some up front padding. */
445 size_t cchPadding = cchOutput - offInstruction;
446 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
447 PUT_STR(g_szSpaces, cchPadding);
448
449 /* comment? */
450 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
451 PUT_SZ(";");
452
453 /*
454 * The address?
455 */
456 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
457 {
458 PUT_C(' ');
459#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
460 if (pDis->uInstrAddr >= _4G)
461 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
462#endif
463 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
464 }
465
466 /*
467 * Opcode bytes?
468 */
469 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
470 {
471 PUT_C(' ');
472 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
473 cchOutput += cchTmp;
474 if (cchTmp >= cchDst)
475 cchTmp = cchDst - (cchDst != 0);
476 cchDst -= cchTmp;
477 pszDst += cchTmp;
478 }
479 }
480
481 /*
482 * Terminate it - on overflow we'll have reserved one byte for this.
483 */
484 if (cchDst > 0)
485 *pszDst = '\0';
486 else
487 Assert(!cchBuf);
488
489 /* clean up macros */
490#undef PUT_PSZ
491#undef PUT_SZ
492#undef PUT_STR
493#undef PUT_C
494 return cchOutput;
495}
496
497
498/**
499 * Formats the current instruction in Yasm (/ Nasm) style.
500 *
501 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
502 *
503 *
504 * @returns The number of output characters. If this is >= cchBuf, then the content
505 * of pszBuf will be truncated.
506 * @param pDis Pointer to the disassembler state.
507 * @param pszBuf The output buffer.
508 * @param cchBuf The size of the output buffer.
509 */
510DISDECL(size_t) DISFormatArmV8(PCDISSTATE pDis, char *pszBuf, size_t cchBuf)
511{
512 return DISFormatArmV8Ex(pDis, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
513}
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use