VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatArmV8.cpp

Last change on this file was 101540, checked in by vboxsync, 7 months ago

DIS,VMM,DBGC,IPRT,++: Some disassembler tweaks and TB disassembly work. [build fix, missing bits] bugref:10371 bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.8 KB
Line 
1/* $Id: DisasmFormatArmV8.cpp 101540 2023-10-22 02:53:53Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
7 * Copyright (C) 2008-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include <VBox/dis.h>
33#include "DisasmInternal.h"
34#include <iprt/assert.h>
35#include <iprt/ctype.h>
36#include <iprt/errcore.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Global Variables *
42*********************************************************************************************************************************/
43static const char g_szSpaces[] =
44" ";
45static const char g_aszArmV8RegGen32[32][4] =
46{
47 "w0\0", "w1\0", "w2\0", "w3\0", "w4\0", "w5\0", "w6\0", "w7\0", "w8\0", "w9\0", "w10", "w11", "w12", "w13", "w14", "w15",
48 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "zr"
49};
50static const char g_aszArmV8RegGen64[32][4] =
51{
52 "x0\0", "x1\0", "x2\0", "x3\0", "x4\0", "x5\0", "x6\0", "x7\0", "x8\0", "x9\0", "x10", "x11", "x12", "x13", "x14", "x15",
53 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "zr"
54};
55
56
57/**
58 * Gets the base register name for the given parameter.
59 *
60 * @returns Pointer to the register name.
61 * @param pDis The disassembler state.
62 * @param pParam The parameter.
63 * @param pcchReg Where to store the length of the name.
64 */
65static const char *disasmFormatArmV8Reg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
66{
67 RT_NOREF_PV(pDis);
68
69 switch (pParam->fUse & ( DISUSE_REG_GEN8 | DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64
70 | DISUSE_REG_FP | DISUSE_REG_MMX | DISUSE_REG_XMM | DISUSE_REG_YMM
71 | DISUSE_REG_CR | DISUSE_REG_DBG | DISUSE_REG_SEG | DISUSE_REG_TEST))
72
73 {
74 case DISUSE_REG_GEN32:
75 {
76 Assert(pParam->armv8.Reg.idxGenReg < RT_ELEMENTS(g_aszArmV8RegGen32));
77 const char *psz = g_aszArmV8RegGen32[pParam->armv8.Reg.idxGenReg];
78 *pcchReg = 2 + !!psz[2];
79 return psz;
80 }
81
82 case DISUSE_REG_GEN64:
83 {
84 Assert(pParam->armv8.Reg.idxGenReg < RT_ELEMENTS(g_aszArmV8RegGen64));
85 const char *psz = g_aszArmV8RegGen64[pParam->armv8.Reg.idxGenReg];
86 *pcchReg = 2 + !!psz[2];
87 return psz;
88 }
89
90 default:
91 AssertMsgFailed(("%#x\n", pParam->fUse));
92 *pcchReg = 3;
93 return "r??";
94 }
95}
96
97
98/**
99 * Formats the current instruction in Yasm (/ Nasm) style.
100 *
101 *
102 * @returns The number of output characters. If this is >= cchBuf, then the content
103 * of pszBuf will be truncated.
104 * @param pDis Pointer to the disassembler state.
105 * @param pszBuf The output buffer.
106 * @param cchBuf The size of the output buffer.
107 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
108 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
109 * @param pvUser User argument for pfnGetSymbol.
110 */
111DISDECL(size_t) DISFormatArmV8Ex(PCDISSTATE pDis, char *pszBuf, size_t cchBuf, uint32_t fFlags,
112 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
113{
114/** @todo monitor and mwait aren't formatted correctly in 64-bit mode. */
115 /*
116 * Input validation and massaging.
117 */
118 AssertPtr(pDis);
119 AssertPtrNull(pszBuf);
120 Assert(pszBuf || !cchBuf);
121 AssertPtrNull(pfnGetSymbol);
122 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
123 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
124 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
125 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
126 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
127
128 PCDISOPCODE const pOp = pDis->pCurInstr;
129
130 /*
131 * Output macros
132 */
133 char *pszDst = pszBuf;
134 size_t cchDst = cchBuf;
135 size_t cchOutput = 0;
136#define PUT_C(ch) \
137 do { \
138 cchOutput++; \
139 if (cchDst > 1) \
140 { \
141 cchDst--; \
142 *pszDst++ = (ch); \
143 } \
144 } while (0)
145#define PUT_STR(pszSrc, cchSrc) \
146 do { \
147 cchOutput += (cchSrc); \
148 if (cchDst > (cchSrc)) \
149 { \
150 memcpy(pszDst, (pszSrc), (cchSrc)); \
151 pszDst += (cchSrc); \
152 cchDst -= (cchSrc); \
153 } \
154 else if (cchDst > 1) \
155 { \
156 memcpy(pszDst, (pszSrc), cchDst - 1); \
157 pszDst += cchDst - 1; \
158 cchDst = 1; \
159 } \
160 } while (0)
161#define PUT_SZ(sz) \
162 PUT_STR((sz), sizeof(sz) - 1)
163#define PUT_SZ_STRICT(szStrict, szRelaxed) \
164 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
165#define PUT_PSZ(psz) \
166 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
167#define PUT_NUM(cch, fmt, num) \
168 do { \
169 cchOutput += (cch); \
170 if (cchDst > 1) \
171 { \
172 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
173 pszDst += cchTmp; \
174 cchDst -= cchTmp; \
175 Assert(cchTmp == (cch) || cchDst == 1); \
176 } \
177 } while (0)
178/** @todo add two flags for choosing between %X / %x and h / 0x. */
179#define PUT_NUM_8(num) PUT_NUM(4, "0x%02x", (uint8_t)(num))
180#define PUT_NUM_16(num) PUT_NUM(6, "0x%04x", (uint16_t)(num))
181#define PUT_NUM_32(num) PUT_NUM(10, "0x%08x", (uint32_t)(num))
182#define PUT_NUM_64(num) PUT_NUM(18, "0x%016RX64", (uint64_t)(num))
183
184#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
185 do { \
186 if ((stype)(num) >= 0) \
187 { \
188 PUT_C('+'); \
189 PUT_NUM(cch, fmt, (utype)(num)); \
190 } \
191 else \
192 { \
193 PUT_C('-'); \
194 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
195 } \
196 } while (0)
197#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, "0x%02x", num, int8_t, uint8_t)
198#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, "0x%04x", num, int16_t, uint16_t)
199#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, "0x%08x", num, int32_t, uint32_t)
200#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, "0x%016RX64", num, int64_t, uint64_t)
201
202#define PUT_SYMBOL_TWO(a_rcSym, a_szStart, a_chEnd) \
203 do { \
204 if (RT_SUCCESS(a_rcSym)) \
205 { \
206 PUT_SZ(a_szStart); \
207 PUT_PSZ(szSymbol); \
208 if (off != 0) \
209 { \
210 if ((int8_t)off == off) \
211 PUT_NUM_S8(off); \
212 else if ((int16_t)off == off) \
213 PUT_NUM_S16(off); \
214 else if ((int32_t)off == off) \
215 PUT_NUM_S32(off); \
216 else \
217 PUT_NUM_S64(off); \
218 } \
219 PUT_C(a_chEnd); \
220 } \
221 } while (0)
222
223#define PUT_SYMBOL(a_uSeg, a_uAddr, a_szStart, a_chEnd) \
224 do { \
225 if (pfnGetSymbol) \
226 { \
227 int rcSym = pfnGetSymbol(pDis, a_uSeg, a_uAddr, szSymbol, sizeof(szSymbol), &off, pvUser); \
228 PUT_SYMBOL_TWO(rcSym, a_szStart, a_chEnd); \
229 } \
230 } while (0)
231
232
233 /*
234 * The address?
235 */
236 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
237 {
238#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
239 if (pDis->uInstrAddr >= _4G)
240 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
241#endif
242 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
243 PUT_C(' ');
244 }
245
246 /*
247 * The opcode bytes?
248 */
249 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
250 {
251 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
252 cchOutput += cchTmp;
253 if (cchDst > 1)
254 {
255 if (cchTmp <= cchDst)
256 {
257 cchDst -= cchTmp;
258 pszDst += cchTmp;
259 }
260 else
261 {
262 pszDst += cchDst - 1;
263 cchDst = 1;
264 }
265 }
266
267 /* Some padding to align the instruction. */
268 size_t cchPadding = (7 * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
269 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
270 + 2;
271 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
272 PUT_STR(g_szSpaces, cchPadding);
273 }
274
275
276 /*
277 * Filter out invalid opcodes first as they need special
278 * treatment. UDF is an exception and should be handled normally.
279 */
280 size_t const offInstruction = cchOutput;
281 if (pOp->uOpcode == OP_INVALID)
282 PUT_SZ("Illegal opcode");
283 else
284 {
285 /*
286 * Formatting context and associated macros.
287 */
288 PCDISOPPARAM pParam = &pDis->Param1;
289 int iParam = 1;
290
291 const char *pszFmt = pOp->pszOpcode;
292
293 /*
294 * The formatting loop.
295 */
296 RTINTPTR off;
297 char szSymbol[128];
298 char ch;
299 while ((ch = *pszFmt++) != '\0')
300 {
301 if (ch == '%')
302 {
303 ch = *pszFmt++;
304 switch (ch)
305 {
306 case 'I': /* Immediate data. */
307 PUT_C('#');
308 switch (pParam->fUse & ( DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64
309 | DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
310 {
311 case DISUSE_IMMEDIATE8:
312 PUT_NUM_8(pParam->uValue);
313 break;
314 case DISUSE_IMMEDIATE16:
315 PUT_NUM_16(pParam->uValue);
316 break;
317 case DISUSE_IMMEDIATE16_SX8:
318 PUT_NUM_16(pParam->uValue);
319 break;
320 case DISUSE_IMMEDIATE32:
321 PUT_NUM_32(pParam->uValue);
322 /** @todo Symbols */
323 break;
324 case DISUSE_IMMEDIATE32_SX8:
325 PUT_NUM_32(pParam->uValue);
326 break;
327 case DISUSE_IMMEDIATE64_SX8:
328 PUT_NUM_64(pParam->uValue);
329 break;
330 case DISUSE_IMMEDIATE64:
331 PUT_NUM_64(pParam->uValue);
332 /** @todo Symbols */
333 break;
334 default:
335 AssertFailed();
336 break;
337 }
338 break;
339
340 case 'X': /* Register. */
341 {
342 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
343 Assert(!(pParam->fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)));
344
345 size_t cchReg;
346 const char *pszReg = disasmFormatArmV8Reg(pDis, pParam, &cchReg);
347 PUT_STR(pszReg, cchReg);
348 break;
349 }
350
351 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
352 {
353 int32_t offDisplacement;
354
355 PUT_C('#');
356 if (pParam->fUse & DISUSE_IMMEDIATE8_REL)
357 {
358 offDisplacement = (int8_t)pParam->uValue;
359 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
360 PUT_NUM_S8(offDisplacement);
361 }
362 else if (pParam->fUse & DISUSE_IMMEDIATE16_REL)
363 {
364 offDisplacement = (int16_t)pParam->uValue;
365 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
366 PUT_NUM_S16(offDisplacement);
367 }
368 else
369 {
370 offDisplacement = (int32_t)pParam->uValue;
371 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
372 PUT_NUM_S32(offDisplacement);
373 }
374 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
375 PUT_SZ(" (");
376
377 RTUINTPTR uTrgAddr = pDis->uInstrAddr + pDis->cbInstr + offDisplacement;
378 if ( pDis->uCpuMode == DISCPUMODE_ARMV8_A32
379 || pDis->uCpuMode == DISCPUMODE_ARMV8_T32)
380 PUT_NUM_32(uTrgAddr);
381 else if (pDis->uCpuMode == DISCPUMODE_ARMV8_A64)
382 PUT_NUM_64(uTrgAddr);
383 else
384 AssertReleaseFailed();
385
386 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
387 {
388 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " = ", ' ');
389 PUT_C(')');
390 }
391 else
392 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " (", ')');
393 break;
394 }
395
396 case 'C': /* Conditional */
397 {
398 /** @todo */
399 /* Skip any whitespace coming after (as this is not really part of the parameters). */
400 while (*pszFmt == ' ')
401 pszFmt++;
402
403 switch (++iParam)
404 {
405 case 2: pParam = &pDis->Param2; break;
406 case 3: pParam = &pDis->Param3; break;
407 case 4: pParam = &pDis->Param4; break;
408 default: pParam = NULL; break;
409 }
410 break;
411 }
412
413 default:
414 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
415 break;
416 }
417 AssertMsg(*pszFmt == ',' || *pszFmt == '\0' || *pszFmt == '%', ("%c%s\n", ch, pszFmt));
418 }
419 else
420 {
421 PUT_C(ch);
422 if (ch == ',')
423 {
424 Assert(*pszFmt != ' ');
425 PUT_C(' ');
426 switch (++iParam)
427 {
428 case 2: pParam = &pDis->Param2; break;
429 case 3: pParam = &pDis->Param3; break;
430 case 4: pParam = &pDis->Param4; break;
431 default: pParam = NULL; break;
432 }
433 }
434 }
435 } /* while more to format */
436 }
437
438 /*
439 * Any additional output to the right of the instruction?
440 */
441 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
442 {
443 /* some up front padding. */
444 size_t cchPadding = cchOutput - offInstruction;
445 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
446 PUT_STR(g_szSpaces, cchPadding);
447
448 /* comment? */
449 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
450 PUT_SZ(";");
451
452 /*
453 * The address?
454 */
455 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
456 {
457 PUT_C(' ');
458#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
459 if (pDis->uInstrAddr >= _4G)
460 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
461#endif
462 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
463 }
464
465 /*
466 * Opcode bytes?
467 */
468 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
469 {
470 PUT_C(' ');
471 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
472 cchOutput += cchTmp;
473 if (cchTmp >= cchDst)
474 cchTmp = cchDst - (cchDst != 0);
475 cchDst -= cchTmp;
476 pszDst += cchTmp;
477 }
478 }
479
480 /*
481 * Terminate it - on overflow we'll have reserved one byte for this.
482 */
483 if (cchDst > 0)
484 *pszDst = '\0';
485 else
486 Assert(!cchBuf);
487
488 /* clean up macros */
489#undef PUT_PSZ
490#undef PUT_SZ
491#undef PUT_STR
492#undef PUT_C
493 return cchOutput;
494}
495
496
497/**
498 * Formats the current instruction in Yasm (/ Nasm) style.
499 *
500 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
501 *
502 *
503 * @returns The number of output characters. If this is >= cchBuf, then the content
504 * of pszBuf will be truncated.
505 * @param pDis Pointer to the disassembler state.
506 * @param pszBuf The output buffer.
507 * @param cchBuf The size of the output buffer.
508 */
509DISDECL(size_t) DISFormatArmV8(PCDISSTATE pDis, char *pszBuf, size_t cchBuf)
510{
511 return DISFormatArmV8Ex(pDis, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
512}
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use